# Transforming Unstructured Data from an AWS S3 bucket into RAG-Ready Data in Singdata Lakehouse

In [1]:
import json
import pandas as pd
import logging
import warnings

logging.basicConfig(level=logging.ERROR, force=True)
warnings.filterwarnings("ignore", category=UserWarning)

# if you want to drop the tables before write data, set drop_tables to True
drop_tables = True

In [2]:
import os
import dotenv

dotenv.load_dotenv('./.env') # replace with the path to your .env file

True

In [3]:
# Define the table names to use for storing the data in Lakehouse.
# index_and_table_prefix = "base_512_"
# raw_table_name = f"{index_and_table_prefix}yunqi_raw_elements"
# silver_table_name = f"{index_and_table_prefix}yunqi_elements"
# embeddings_dimensions = 768
# chunk_max_characters =512
# chunk_overlap = 200
# embedding_provider = "huggingface"
# embedding_model_name = "BAAI/bge-base-zh-v1.5"


index_and_table_prefix = "m3_1024_2048_20250510_"
raw_table_name = f"{index_and_table_prefix}yunqi_raw_elements"
silver_table_name = f"{index_and_table_prefix}yunqi_elements"
embeddings_dimensions = 1024
chunk_max_characters =2048
chunk_overlap = 512
embedding_provider = "huggingface"
embedding_model_name = "BAAI/bge-m3"


In [4]:
# Get the connection parameter to Singdata Lakehouse.
_username = os.getenv("cz_username")
_password = os.getenv("cz_password")
_service = os.getenv("cz_service")
_instance = os.getenv("cz_instance")
_workspace = os.getenv("cz_workspace")
_schema = os.getenv("cz_schema")
_vcluster = os.getenv("cz_vcluster")

In [5]:
# Define the schema to use for storing the data in Singdata Lakehouse.
raw_table_ddl = f"""
CREATE TABLE IF NOT EXISTS {_schema}.{raw_table_name} (
    id STRING, -- Auto-increment sequence
    record_locator STRING,
    type STRING,
    record_id STRING, -- Record identifier from the data source (e.g., record locator in connector metadata)
    element_id STRING, -- Unique identifier for the element (SHA-256 or UUID)
    filetype STRING, -- File type (e.g., PDF, DOCX, EML, etc.)
    file_directory STRING, -- Directory where the file is located
    filename STRING, -- File name
    last_modified TIMESTAMP, -- Last modified time of the file
    languages STRING, -- Document language, supports a list of multiple languages
    page_number STRING, -- Page number (applicable for PDF, DOCX, etc.)
    text STRING, -- Extracted text content
    embeddings VECTOR({embeddings_dimensions}), -- Vector data
    parent_id STRING, -- Parent element ID, used to represent element hierarchy
    is_continuation BOOLEAN, -- Whether it is a continuation of the previous element (used in chunking)
    orig_elements STRING, -- Original element in JSON format (used to store the complete element structure)
    element_type STRING, -- Element type (e.g., NarrativeText, Title, Table, etc.)
    coordinates STRING, -- Element coordinates (stored in JSONB format)
    link_texts STRING, -- Added field: Link text
    link_urls STRING, -- Added field: Link URL
    email_message_id STRING, -- Added field: Email message ID
    sent_from STRING, -- Added field: Sender
    sent_to STRING, -- Added field: Recipient
    subject STRING, -- Added field: Subject
    url STRING, -- Added field: URL
    version STRING, -- Added field: Version
    date_created TIMESTAMP, -- Added field: Creation date
    date_modified TIMESTAMP, -- Added field: Modification date
    date_processed TIMESTAMP, -- Added field: Processing date
    text_as_html STRING, -- Added field: Text in HTML format
    emphasized_text_contents STRING,
    emphasized_text_tags STRING,
    documents_original_source STRING, -- Added field: Document source
);
"""

silver_table_ddl = f"""
CREATE TABLE IF NOT EXISTS {_schema}.{silver_table_name} (
    id STRING, -- Auto-increment sequence
    record_locator STRING,
    type STRING,
    record_id STRING, -- Record identifier from the data source (e.g., record locator in connector metadata)
    element_id STRING, -- Unique identifier for the element (SHA-256 or UUID)
    filetype STRING, -- File type (e.g., PDF, DOCX, EML, etc.)
    file_directory STRING, -- Directory where the file is located
    filename STRING, -- File name
    last_modified TIMESTAMP, -- Last modified time of the file
    languages STRING, -- Document language, supports a list of multiple languages
    page_number STRING, -- Page number (applicable for PDF, DOCX, etc.)
    text STRING, -- Extracted text content
    embeddings vector({embeddings_dimensions}), -- Vector data
    parent_id STRING, -- Parent element ID, used to represent element hierarchy
    is_continuation BOOLEAN, -- Whether it is a continuation of the previous element (used in chunking)
    orig_elements STRING, -- Original element in JSON format (used to store the complete element structure)
    element_type STRING, -- Element type (e.g., NarrativeText, Title, Table, etc.)
    coordinates STRING, -- Element coordinates (stored in JSONB format)
    link_texts STRING, -- Added field: Link text
    link_urls STRING, -- Added field: Link URL
    email_message_id STRING, -- Added field: Email message ID
    sent_from STRING, -- Added field: Sender
    sent_to STRING, -- Added field: Recipient
    subject STRING, -- Added field: Subject
    url STRING, -- Added field: URL
    version STRING, -- Added field: Version
    date_created TIMESTAMP, -- Added field: Creation date
    date_modified TIMESTAMP, -- Added field: Modification date
    date_processed TIMESTAMP, -- Added field: Processing date
    text_as_html STRING, -- Added field: Text in HTML format
    emphasized_text_contents STRING,
    emphasized_text_tags STRING,
    documents_source STRING, -- Added field: Document source
    INDEX {index_and_table_prefix}inverted_text_index_yunqi_cn (text) INVERTED  PROPERTIES('analyzer'='unicode'),
    INDEX {index_and_table_prefix}embeddings_vec_index_yunqi_cn(embeddings) USING vector properties (
        "scalar.type" = "f32",
        "distance.function" = "cosine_distance")
);
"""

clean_transformation_data_sql = f"""
INSERT overwrite {_schema}.{silver_table_name}
SELECT 
    id, 
    record_locator, 
    type, 
    record_id, 
    element_id, 
    filetype, 
    file_directory, 
    filename, 
    last_modified, 
    languages, 
    page_number, 
    text, 
    CAST(embeddings AS VECTOR({embeddings_dimensions})) AS embeddings, 
    parent_id, 
    is_continuation, 
    orig_elements, 
    element_type, 
    coordinates, 
    link_texts, 
    link_urls, 
    email_message_id, 
    sent_from, 
    sent_to, 
    subject, 
    url, 
    version, 
    date_created, 
    date_modified, 
    date_processed, 
    text_as_html,
    emphasized_text_contents, 
    emphasized_text_tags,
    "https://yunqi.tech/documents" as documents_source
FROM {_schema}.{raw_table_name};
"""

In [6]:
# Define the function to create the connection to Singdata Lakehouse.
from clickzetta.connector import connect
import pandas as pd
def get_connection(password, username, service, instance, workspace, schema, vcluster):
    connection = connect(
        password=password,
        username=username,
        service=service,
        instance=instance,
        workspace=workspace,
        schema=schema,
        vcluster=vcluster)
    return connection

In [7]:
# Create the connection to Singdata Lakehouse.
conn = get_connection(password=_password, username=_username, service=_service, instance=_instance, workspace=_workspace, schema=_schema, vcluster=_vcluster)

In [8]:
# Function to execute SQL statements
def excute_sql(conn,sql_statement: str):
    with conn.cursor() as cur:

        stmt = sql_statement

        cur.execute(stmt)

        results = cur.fetchall()

    return results

In [9]:
if drop_tables:
    excute_sql(conn,f"DROP TABLE IF EXISTS {_schema}.{raw_table_name}")
    # excute_sql(conn,f"DROP TABLE IF EXISTS {_schema}.{silver_table_name}")

In [10]:
# Create Table in Singdata Lakehouse
excute_sql(conn, raw_table_ddl)
excute_sql(conn, silver_table_ddl)

[['OPERATION SUCCEED']]

### PDFs/Images/Emails ingestion and preprocessing pipeline

In [11]:
from unstructured_ingest.interfaces import ProcessorConfig
from unstructured_ingest.pipeline.pipeline import Pipeline
from unstructured_ingest.processes.chunker import ChunkerConfig
from unstructured_ingest.processes.connectors.fsspec.s3 import (
    S3ConnectionConfig,
    S3DownloaderConfig,
    S3IndexerConfig,
    S3AccessConfig,
)
from unstructured_ingest.processes.connectors.local import (
    LocalIndexerConfig,
    LocalDownloaderConfig,
    LocalConnectionConfig
)
from unstructured_ingest.processes.embedder import EmbedderConfig
from unstructured_ingest.processes.partitioner import PartitionerConfig

from unstructured_ingest.processes.connectors.sql.clickzetta import (
    ClickzettaConnectionConfig,
    ClickzettaAccessConfig,
    ClickzettaUploadStagerConfig,
    ClickzettaUploaderConfig
)

In [12]:
# !rm -rf /Users/liangmo/.cache/unstructured/ingest/pipeline/*

In [13]:
os.getenv("LOCAL_FILE_INPUT_DIR")

'/Users/liangmo/yunqidoc/cn_markdown_20250526'

In [14]:
pipeline = Pipeline.from_configs(

    context=ProcessorConfig(
        verbose=True,
        tqdm=True,
        num_processes=8,
    ),

    indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR"),file_glob="**/*", recursive=True),
        downloader_config=LocalDownloaderConfig(),
        source_connection_config=LocalConnectionConfig(),

    partitioner_config=PartitionerConfig(
        partition_by_api=False,
        api_key=os.getenv("UNSTRUCTURED_API_KEY"),
        partition_endpoint=os.getenv("UNSTRUCTURED_URL"),
        strategy="hi_res",
        additional_partition_args={
            "split_pdf_page": True,
            "split_pdf_allow_failed": True,
            "split_pdf_concurrency_level": 8
        }
    ),

    chunker_config=ChunkerConfig(
        chunking_strategy="by_title",
        chunk_max_characters=chunk_max_characters,
        chunk_overlap=chunk_overlap,
        chunk_combine_text_under_n_chars=200,
    ),

    embedder_config=EmbedderConfig(
        embedding_provider = embedding_provider,
        embedding_model_name = embedding_model_name,
    ),

    destination_connection_config=ClickzettaConnectionConfig(
        access_config=ClickzettaAccessConfig(password=_password),
        username=_username,
        service=_service,
        instance=_instance,
        workspace=_workspace,
        schema=_schema,
        vcluster=_vcluster,
    ),
    stager_config=ClickzettaUploadStagerConfig(),
    uploader_config=ClickzettaUploaderConfig(table_name=raw_table_name, documents_original_source="https://yunqi.tech/documents"),
)

pipeline.run()

2025-05-26 12:40:39,464 MainProcess INFO     created indexer with configs: {"input_path":"/Users/liangmo/yunqidoc/cn_markdown_20250526","recursive":true}, connection configs: {"access_config":"**********"}
2025-05-26 12:40:39,464 MainProcess INFO     Created download with configs: {"download_dir":null}, connection configs: {"access_config":"**********"}
2025-05-26 12:40:39,465 MainProcess INFO     created partition with configs: {"strategy":"hi_res","ocr_languages":null,"encoding":null,"additional_partition_args":{"split_pdf_page":true,"split_pdf_allow_failed":true,"split_pdf_concurrency_level":8},"skip_infer_table_types":null,"fields_include":["element_id","text","type","metadata","embeddings"],"flatten_metadata":false,"metadata_exclude":[],"element_exclude":[],"metadata_include":[],"partition_endpoint":"https://api.unstructuredapp.io/general/v0/general","partition_by_api":false,"api_timeout_ms":null,"api_key":"**********","hi_res_model_name":null,"raise_unsupported_filetype":false}
2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
2025-05-26 12:40:49,422 MainProcess INFO     indexer finished in 4.5e-05s
2025-05-26 12:40:49,508 MainProcess DEBUG    generated file data: {"identifier":"/Users/liangmo/yunqidoc/cn_markdown_20250526/drop-external-schema.md","connector_type":"local","source_identifiers":{"filename":"drop-external-schema.md","fullpath":"/Users/liangmo/yunqidoc/cn_markdown_20250526/drop-external-schema.md","rel_path":"drop-external-schema.md"},"metadata":{"url":null,"version":null,"record_locator":{"path":"/Users/liangmo/yunqidoc/cn_markdown_20250526/drop-external-schema.md"},"date_created":"1748227661.3727186","date_modified":"1748227661.3727562","date_processed":"1748234449.508506","permissions_data":[{"mode":33188}],"filesi

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:37,394 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28afb76e7309.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:37,398 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/39d8222ce857.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:37,436 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/30ba797226c3.json not detected as batch file data
Removed trailing se

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:38,683 SpawnPoolWorker-37 DEBUG    upload finished in 1.285563s, attributes: file_id=39d8222ce857
2025-05-26 13:05:38,684 SpawnPoolWorker-37 DEBUG    upload finished in 1.286232s, attributes: file_id=39d8222ce857
upload:   0%|          | 2/1056 [00:03<27:37,  1.57s/it]2025-05-26 13:05:38,685 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2c3d5e6b3641.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:38,766 SpawnPoolWorker-36 DEBUG    upload finished in 1.438914s, attributes: file_id=928e4a988d51
2025-05-26 13:05:38,766 SpawnPoolWorker-36 DEBUG    upload finished in 1.439889s, attributes: file_id=928e4a988d51
2025-05-26 13:05:38,767 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:39,036 SpawnPoolWorker-40 DEBUG    upload finished in 0.708983s, attributes: file_id=62e679a0b27e
2025-05-26 13:05:39,037 SpawnPoolWorker-40 DEBUG    upload finished in 0.709768s, attributes: file_id=62e679a0b27e
upload:   1%|          | 7/1056 [00:04<05:24,  3.23it/s]2025-05-26 13:05:39,038 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9611b8eaa985.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:39,111 SpawnPoolWorker-35 DEBUG    upload finished in 1.732318s, attributes

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:39,534 SpawnPoolWorker-41 DEBUG    upload finished in 0.706275s, attributes: file_id=da852cae5692
2025-05-26 13:05:39,537 SpawnPoolWorker-41 DEBUG    upload finished in 0.70903s, attributes: file_id=da852cae5692
2025-05-26 13:05:39,540 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/24d29fbac1bc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:39,641 SpawnPoolWorker-34 DEBUG    upload finished in 0.795114s, attributes: file_id=f1f095427b1d
2025-05-26 13:05:39,642 SpawnPoolWorker-34 DEBUG    upload finished in 0.796061s, attributes: file_id=f1f095427b1d
upload:   1%|          | 11/1056 [00:04<03:59,  4.37it/s]2025-05-26 13:05:39,6

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:39,758 SpawnPoolWorker-39 DEBUG    upload finished in 0.930408s, attributes: file_id=8f6754bb56da
2025-05-26 13:05:39,759 SpawnPoolWorker-39 DEBUG    upload finished in 0.931306s, attributes: file_id=8f6754bb56da
upload:   1%|          | 12/1056 [00:04<03:36,  4.83it/s]2025-05-26 13:05:39,762 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f9d1f85f2d19.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:39,924 SpawnPoolWorker-36 DEBUG    upload finished in 1.156279s, attributes: file_id=282224d85f28
2025-05-26 13:05:39,924 SpawnPoolWorker-36 DEBUG    upload finished in 1.15696s, attributes: file_id=282224d85f28
upload:   1%|        

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:05:40,046 SpawnPoolWorker-37 DEBUG    upload finished in 1.360569s, attributes: file_id=2c3d5e6b3641
2025-05-26 13:05:40,046 SpawnPoolWorker-37 DEBUG    upload finished in 1.361221s, attributes: file_id=2c3d5e6b3641
upload:   1%|▏         | 14/1056 [00:05<03:07,  5.55it/s]2025-05-26 13:05:40,048 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e1d156986d6a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:40,141 SpawnPoolWorker-40 DEBUG    upload finished in 1.102874s, attributes: file_id=9611b8eaa985
2025-05-26 13:05:40,141 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:40,290 SpawnPoolWorker-38 DEBUG    upload finished in 1.133316s, attributes: file_id=9618f566a8db
2025-05-26 13:05:40,290 SpawnPoolWorker-38 DEBUG    upload finished in 1.133851s, attributes: file_id=9618f566a8db
2025-05-26 13:05:40,291 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/30ce2ed2e990.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:40,658 SpawnPoolWorker-41 DEBUG    upload finished in 1.118478s, attributes: file_id=24d29fbac1bc
2025-05-26 13:05:40,658 SpawnPoolWorker-41 DEBUG    upload finished in 1.119281s, attributes: file_id=24d29fbac1bc
upload:   2%|▏         | 18/1056 [00:05<02:56,  5.88it/s]2025-05-26 13:05:40,660 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/14b35af5b6f4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:40,795 SpawnPoolWorker-36 DEBUG    upload finished in 0.86982s, attributes: file_id=d1aa326fbb91
2025-05-26 13:05:40,796 SpawnPoolWorker-36 DEBUG    upload finished in 0.870376s, attributes: file_id=d1aa326fbb91
upload:   2%|▏         | 19/1056 [00:05<02:49,  6.12it/s]2025-05-26 13:05:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:40,867 SpawnPoolWorker-37 DEBUG    upload finished in 0.819308s, attributes: file_id=e1d156986d6a
2025-05-26 13:05:40,868 SpawnPoolWorker-37 DEBUG    upload finished in 0.820177s, attributes: file_id=e1d156986d6a
2025-05-26 13:05:40,870 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0d739f0d02b6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:40,933 SpawnPoolWorker-40 DEBUG    upload finished in 0.790345s, attributes: file_id=1ef697619383
2025-05-26 13:05:40,933 SpawnPoolWorker-40 DEBUG    upload finished in 0.791022s, attributes: file_id=1ef697619383
upload:   2%|▏         |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:41,090 SpawnPoolWorker-39 DEBUG    upload finished in 1.328451s, attributes: file_id=f9d1f85f2d19
2025-05-26 13:05:41,090 SpawnPoolWorker-39 DEBUG    upload finished in 1.329208s, attributes: file_id=f9d1f85f2d19
upload:   2%|▏         | 24/1056 [00:06<01:37, 10.57it/s]2025-05-26 13:05:41,092 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fcb6a179ed57.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:41,148 SpawnPoolWorker-38 DEBUG    upload finished in 0.85666s, attributes: file_id=30ce2ed2e990
2025-05-26 13:05:41,148 SpawnP

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:41,514 SpawnPoolWorker-36 DEBUG    upload finished in 0.717374s, attributes: file_id=2ffc2f8c056f
2025-05-26 13:05:41,515 SpawnPoolWorker-36 DEBUG    upload finished in 0.718263s, attributes: file_id=2ffc2f8c056f
upload:   2%|▏         | 26/1056 [00:06<02:13,  7.72it/s]2025-05-26 13:05:41,517 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9b40f78dadc2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:41,567 SpawnPoolWorker-35 DEBUG    upload finished in 0.59465s, attributes: file_id=7f2e56dd3c7f
2025-05-26 13:05:41,568 SpawnPoolWorker-35 DEBUG    upload finished in 0.595274s, attributes: file_id=7f2e56dd3c7f
2025-05-26 13:05:41,569 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:41,770 SpawnPoolWorker-40 DEBUG    upload finished in 0.83497s, attributes: file_id=8726c433e812
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:41,770 SpawnPoolWorker-40 DEBUG    upload finished in 0.835629s, attributes: file_id=8726c433e812
2025-05-26 13:05:41,773 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d7174cb6c95e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:41,853 SpawnPoolWorker-38 DEBUG    upload finished in 0.703952s, attributes: file_id=c815b90e87b6
2025-05-26 13:05:41,853 SpawnPoolWorker-38 DEBUG    upload finished in 0.704475s, attributes: file_id=c815b90e87b6
upload:   3%|▎         | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:41,991 SpawnPoolWorker-34 DEBUG    upload finished in 0.968811s, attributes: file_id=6f8221c729f3
2025-05-26 13:05:41,992 SpawnPoolWorker-34 DEBUG    upload finished in 0.969409s, attributes: file_id=6f8221c729f3
upload:   3%|▎         | 33/1056 [00:06<01:26, 11.77it/s]2025-05-26 13:05:41,993 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6777617ff215.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:42,387 SpawnPoolWorker-36 DEBUG    upload finished in 0.86969s, attributes: file_id=9b40f78dadc2
2025-05-26 13:05:42,387 SpawnPoolWorker-36 DEBUG    upload finished in 0.870423s, attributes: file_id=9b40f78dadc2
2025-05-26 13:05:42,389 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dd7c2134dd4a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:42,455 SpawnPoolWorker-35 DEBUG    upload finished in 0.88588s, attributes: file_id=d76f1ee9a93a
2025-05-26 13:05:42,455 SpawnPoolWorker-35 DEBUG    upload finished in 0.886464s, attributes: file_id=d76f1ee9a93a
upload:   3%|▎         | 35/1056 [00:07<02:07,  8.01it/s]2025-05-26 13:05:42,457 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/u

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:42,650 SpawnPoolWorker-39 DEBUG    upload finished in 0.728949s, attributes: file_id=3b77de03c19d
2025-05-26 13:05:42,651 SpawnPoolWorker-39 DEBUG    upload finished in 0.729465s, attributes: file_id=3b77de03c19d
upload:   4%|▎         | 37/1056 [00:07<01:59,  8.53it/s]2025-05-26 13:05:42,653 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a9743bffe8ca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:42,704 SpawnPoolWorker-38 DEBUG    upload finished in 0.849266s, attributes: file_id=e83057c7f917
2025-05-26 13:05:42,704 SpawnPoolWorker-38 DEBUG    upload finished in 0.849686s, attributes: file_id=e83057c7f917
2025-05-26 13:05:42,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:42,925 SpawnPoolWorker-34 DEBUG    upload finished in 0.931812s, attributes: file_id=6777617ff215
2025-05-26 13:05:42,925 SpawnPoolWorker-34 DEBUG    upload finished in 0.932397s, attributes: file_id=6777617ff215
upload:   4%|▍         | 41/1056 [00:07<01:36, 10.56it/s]2025-05-26 13:05:42,927 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f36bc8fa829b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:43,114 SpawnPoolWorker-36 DEBUG    upload finished in 0.724928s, attributes: file_id=dd7c2134dd4a
2025-05-26 13:05:43,114 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:43,190 SpawnPoolWorker-35 DEBUG    upload finished in 0.73283s, attributes: file_id=d6f74c45c704
2025-05-26 13:05:43,190 SpawnPoolWorker-35 DEBUG    upload finished in 0.733374s, attributes: file_id=d6f74c45c704
upload:   4%|▍         | 43/1056 [00:08<01:47,  9.46it/s]2025-05-26 13:05:43,191 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cd7f4fb1aec5.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:43,273 SpawnPoolWorker-40 DEBUG    upload finished in 0.720211s, attributes: file_id=5fa317ef2afa
2025-05-26 13:05:43,274 SpawnPoolWorker-40 DEBUG    upload finished in 0.720663s, attributes: file_id=5fa317ef2afa
2025-05-26 13:05:43,275 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9467de9f03ab.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
202

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:43,432 SpawnPoolWorker-38 DEBUG    upload finished in 0.726981s, attributes: file_id=9496c96e5625
2025-05-26 13:05:43,432 SpawnPoolWorker-38 DEBUG    upload finished in 0.727454s, attributes: file_id=9496c96e5625
2025-05-26 13:05:43,434 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d6dfee6e4a1e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:43,554 SpawnPoolWorker-37 DEBUG    upload finished in 0.796022s, attributes: file_id=656ad8567d1f
2025-05-26 13:05:43,554 SpawnPoolWorker-37 DEBUG    upload finished in 0.796557s, attributes: file_id=656ad8567d1f
upload:   4%|▍         |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:05:43,662 SpawnPoolWorker-34 DEBUG    upload finished in 0.735222s, attributes: file_id=f36bc8fa829b
2025-05-26 13:05:43,662 SpawnPoolWorker-34 DEBUG    upload finished in 0.735662s, attributes: file_id=f36bc8fa829b
2025-05-26 13:05:43,663 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/95e6845c6a4f.json not detected as batch file data
2025-05-26 13:05:43,688 SpawnPoolWorker-41 DEBUG    upload finished in 0.845737s, attributes: file_id=f60235c41bb6
2025-05-26 13:05:43,688 SpawnPoolWorker-41 DEBUG    upload finished in 0.846205s, attributes: file_id=f60235c41bb6
upload:   5%|▍         | 49/1056 [00:08<01:29, 11.27it/s]2025-05-26 13:05:43,689 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec27297bcb6a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:43,796 SpawnPoolWorker-36 DEBUG    upl

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:43,857 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec19a965c2c4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:43,922 SpawnPoolWorker-35 DEBUG    upload finished in 0.730874s, attributes: file_id=cd7f4fb1aec5
2025-05-26 13:05:43,922 SpawnPoolWorker-35 DEBUG    upload finished in 0.731352s, attributes: file_id=cd7f4fb1aec5
2025-05-26 13:05:43,924 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e120653f79d4.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:44,124 SpawnPoolWorker-39 DEBUG    upload finished in 0.743037s, attributes: file_id=b04c0ba0d9c2
2025-05-26 13:05:44,124 SpawnPoolWorker-39 DEBUG    upload finished in 0.743502s, attributes: file_id=b04c0ba0d9c2
upload:   5%|▌         | 53/1056 [00:09<01:41,  9.87it/s]2025-05-26 13:05:44,126 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fea79d25f7ab.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:44,190 SpawnPoolWorker-37 DEBUG    upload finished in 0.634573s, attributes: file_id=0986ea88e197
2025-05-26 13:05:44,190 SpawnPoolWorker-37 DEBUG    upload finished in 0.635013s, attributes: file_id=0986ea88e197
2025-05-26 13:05:44,192 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:44,336 SpawnPoolWorker-41 DEBUG    upload finished in 0.647044s, attributes: file_id=ec27297bcb6a
2025-05-26 13:05:44,337 SpawnPoolWorker-41 DEBUG    upload finished in 0.647541s, attributes: file_id=ec27297bcb6a
2025-05-26 13:05:44,338 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/97069fb7205f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Remov

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:44,564 SpawnPoolWorker-36 DEBUG    upload finished in 0.766141s, attributes: file_id=30173c8f1ce3
2025-05-26 13:05:44,564 SpawnPoolWorker-36 DEBUG    upload finished in 0.766672s, attributes: file_id=30173c8f1ce3
upload:   5%|▌         | 58/1056 [00:09<01:34, 10.54it/s]2025-05-26 13:05:44,566 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ad5b65f45a71.json not detected as batch file data
2025-05-26 13:05:44,590 SpawnPoolWorker-35 DEBUG    upload finished in 0.666732s, attributes: file_id=e120653f79d4
2025-05-26 13:05:44,590 SpawnPoolWorker-35 DEBUG    upload finished in 0.667238s, attributes: file_id=e120653f79d4
2025-05-26 13:05:44,592 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7253843cb9ee.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:44,900 SpawnPoolWorker-39 DEBUG    upload finished in 0.774229s, attributes: file_id=fea79d25f7ab
2025-05-26 13:05:44,900 SpawnPoolWorker-39 DEBUG    upload finished in 0.774781s, attributes: file_id=fea79d25f7ab
2025-05-26 13:05:44,902 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/11b608392436.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:44,999 SpawnPoolWorker-38 DEBUG    upload finished in 0.728199s, attributes: file_id=fe5a54087049
2025-05-26 13:05:44,999 SpawnPoolWorker-38 DEBUG    upload finished in 0.728872s, attributes: file_id=fe5a54087049
upload:   6%|▌         | 62/1056 [00:09<01:45,  9.44it/s]2025-05-26 13:05:45,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:45,167 SpawnPoolWorker-34 DEBUG    upload finished in 0.842587s, attributes: file_id=cef214606a1a
2025-05-26 13:05:45,167 SpawnPoolWorker-34 DEBUG    upload finished in 0.843128s, attributes: file_id=cef214606a1a
upload:   6%|▌         | 64/1056 [00:10<01:38, 10.04it/s]2025-05-26 13:05:45,168 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/58d725616b0c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:45,232 SpawnPoolWorker-41 DEBUG    upload finished in 0.894223s, attributes: file_id=97069fb7205f
2025-05-26 13:05:45,232 SpawnPoolWorker-41 DEBUG    upload finished in 0.894724s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:45,371 SpawnPoolWorker-35 DEBUG    upload finished in 0.779388s, attributes: file_id=7253843cb9ee
2025-05-26 13:05:45,371 SpawnPoolWorker-35 DEBUG    upload finished in 0.779864s, attributes: file_id=7253843cb9ee
2025-05-26 13:05:45,373 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d98912d2b7c8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:45,436 SpawnPoolWorker-40 DEBUG    upload finished in 0.758923s, attributes: file_id=aa28d1f2d64f
2025-05-26 13:05:45,436 SpawnPoolWorker-40 DEBUG    upload finished in 0.759319s, attributes: file_id=aa28d1f2d64f
upload:   6%|▋         |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:45,614 SpawnPoolWorker-39 DEBUG    upload finished in 0.712779s, attributes: file_id=11b608392436
2025-05-26 13:05:45,615 SpawnPoolWorker-39 DEBUG    upload finished in 0.713387s, attributes: file_id=11b608392436
2025-05-26 13:05:45,616 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/604c425991c6.json not detected as batch file data
2025-05-26 13:05:45,692 SpawnPoolWorker-38 DEBUG    upload finished in 0.691024s, attributes: file_id=b66015eb79b4
2025-05-26 13:05:45,692 SpawnPoolWorker-38 DEBUG    upload finished in 0.691518s, attributes: file_id=b66015eb79b4
upload:   7%|▋         | 70/1056 [00:10<01:34, 10.40it/s]2025-05-26 13:05:45,694 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2034589fdf31.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:45,967 SpawnPoolWorker-34 DEBUG    upload finished in 0.799074s, attributes: file_id=58d725616b0c
2025-05-26 13:05:45,967 SpawnPoolWorker-34 DEBUG    upload finished in 0.799529s, attributes: file_id=58d725616b0c
upload:   7%|▋         | 72/1056 [00:10<01:46,  9.22it/s]2025-05-26 13:05:45,969 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/98d7a2e3b026.json not detected as batch file data
2025-05-26 13:05:46,070 SpawnPoolWorker-41 DEBUG    upload finished in 0.836576s, attributes: file_id=4eb1a52ab794
2025-05-26 13:05:46,070 SpawnPoolWorker-41 DEBUG    upload finished in 0.837025s, attributes: file_id=4eb1a52ab794
2025-05-26 13:05:46,071 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c11a7bff253c.json not detected as batch file data
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:46,173 SpawnPoolWorker-40 DEBUG    upload finished in 0.735858s, attributes: file_id=ba04d4fe054d
2025-05-26 13:05:46,174 SpawnPoolWorker-40 DEBUG    upload finished in 0.736674s, attributes: file_id=ba04d4fe054d
upload:   7%|▋         | 74/1056 [00:11<01:44,  9.36it/s]2025-05-26 13:05:46,176 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4e8cf0f37e5f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:46,357 SpawnPoolWorker-39 DEBUG    upload finished in 0.741393s, attributes: file_id=604c425991c6
2025-05-26 13:05:46,358 SpawnPoolWorker-39 DEBUG    upload finished in 0.741848s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:46,460 SpawnPoolWorker-35 DEBUG    upload finished in 1.087749s, attributes: file_id=d98912d2b7c8
2025-05-26 13:05:46,460 SpawnPoolWorker-35 DEBUG    upload finished in 1.088235s, attributes: file_id=d98912d2b7c8
upload:   7%|▋         | 76/1056 [00:11<01:55,  8.49it/s]2025-05-26 13:05:46,462 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b2599b668ae7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:46,549 SpawnPoolWorker-36 DEBUG    upload finished in 1.229137s, attributes: file_id=2f021fafb356
2025-05-26 13:05:46,549 SpawnPoolWorker-36 DEBUG    upload finished in 1.229699s, attributes: file_id=2f021fafb356
2025-05-26 13:05:46,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:46,680 SpawnPoolWorker-34 DEBUG    upload finished in 0.711142s, attributes: file_id=98d7a2e3b026
2025-05-26 13:05:46,680 SpawnPoolWorker-34 DEBUG    upload finished in 0.711711s, attributes: file_id=98d7a2e3b026
2025-05-26 13:05:46,681 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e87d79311990.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:46,745 SpawnPoolWorker-37 DEBUG    upload finished in 0.971579s, attributes: file_id=1833f8f69368
2025-05-26 13:05:46,745 SpawnPoolWorker-37 DEBUG    upload finished in 0.972096s, attributes: file_id=1833f8f69368
upload:   8%|▊         | 80/1056 [00:11<01:31, 10.71it/s]2025-05-26 13:05:46,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:46,908 SpawnPoolWorker-40 DEBUG    upload finished in 0.732143s, attributes: file_id=4e8cf0f37e5f
2025-05-26 13:05:46,908 SpawnPoolWorker-40 DEBUG    upload finished in 0.732702s, attributes: file_id=4e8cf0f37e5f
upload:   8%|▊         | 82/1056 [00:11<01:27, 11.14it/s]2025-05-26 13:05:46,909 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d16c9bc85580.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:47,178 SpawnPoolWorker-35 DEBUG    upload finished in 0.716386s, attributes: file_id=b2599b668ae7
2025-05-26 13:05:47,178 SpawnPoolWorker-35 DEBUG    upload finished in 0.71694s, attributes: file_id=b2599b668ae7
2025-05-26 13:05:47,180 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8f8cde39f03d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:47,295 SpawnPoolWorker-38 DEBUG    upload finished in 0.674551s, attributes: file_id=c1cfa7e66d73
2025-05-26 13:05:47,295 SpawnPoolWorker-38 DEBUG    upload finished in 0.675045s, attributes: file_id=c1cfa7e66d73
upload:   8%|▊         | 84/1056 [00:12<01:57,  8.27it/s]2025-05-26 13:05:47,297 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:47,396 SpawnPoolWorker-39 DEBUG    upload finished in 1.037135s, attributes: file_id=75b9bc9f4a93
2025-05-26 13:05:47,396 SpawnPoolWorker-39 DEBUG    upload finished in 1.037716s, attributes: file_id=75b9bc9f4a93
upload:   8%|▊         | 85/1056 [00:12<01:53,  8.52it/s]2025-05-26 13:05:47,398 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/00d8e43b8a7c.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:47,432 SpawnPoolWorker-34 DEBUG    upload finished in 0.751066s, attributes: file_id=e87d79311990
2025-05-26 13:05:47,434 SpawnPoolWorker-34 DEBUG    upload finished in 0.753517s, attributes: file_id=e87d79311990
2025-05-26 13:05:47,448 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fe482e087a25.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: htt

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:47,643 SpawnPoolWorker-36 DEBUG    upload finished in 1.09248s, attributes: file_id=c5efc6a2bc9b
2025-05-26 13:05:47,643 SpawnPoolWorker-36 DEBUG    upload finished in 1.092993s, attributes: file_id=c5efc6a2bc9b
upload:   8%|▊         | 89/1056 [00:12<01:23, 11.56it/s]2025-05-26 13:05:47,645 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ea861701a34e.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:47,721 SpawnPoolWorker-40 DEBUG    upload finished in 0.81243s, attributes: file_id=d16c9bc85580
2025-05-26 13:05:47,722 SpawnPoolWorker-40 DEBUG    upload finished in 0.812984s, attributes: file_id=d16c9bc85580
2025-05-26 13:05:47,723 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53f7f44bad0e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a D

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:47,919 SpawnPoolWorker-35 DEBUG    upload finished in 0.739118s, attributes: file_id=8f8cde39f03d
2025-05-26 13:05:47,919 SpawnPoolWorker-35 DEBUG    upload finished in 0.739626s, attributes: file_id=8f8cde39f03d
upload:   9%|▊         | 91/1056 [00:12<01:39,  9.73it/s]2025-05-26 13:05:47,921 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c806cf1621a5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:48,034 SpawnPoolWorker-38 DEBUG    upload finished in 0.736993s, attributes: file_id=dec2b0e72dc8
2025-05-26 13:05:48,034 SpawnPoolWorker-38 DEBUG    upload finished in 0.737605s, attributes: file_id=dec2b0e72dc8
2025-05-26 13:05:48,036 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:48,124 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/23c8706a3644.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:48,202 SpawnPoolWorker-37 DEBUG    upload finished in 0.676301s, attributes: file_id=fd4922686b20
2025-05-26 13:05:48,203 SpawnPoolWorker-37 DEBUG    upload finished in 0.676871s, attributes: file_id=fd4922686b20
2025-05-26 13:05:48,204 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e65d37e7ae7f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:48,234 SpawnPoolWorker-36 DEBUG    upload finished in 0.589077s, attributes: file_id=ea861701a34e
2025-05-26 13:05:48,234 SpawnPo

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:48,389 SpawnPoolWorker-41 DEBUG    upload finished in 0.810652s, attributes: file_id=8a3d2df77468
2025-05-26 13:05:48,389 SpawnPoolWorker-41 DEBUG    upload finished in 0.811041s, attributes: file_id=8a3d2df77468
upload:   9%|▉         | 97/1056 [00:13<01:21, 11.79it/s]2025-05-26 13:05:48,390 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/10bb83350399.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:48,492 SpawnPoolWorker-40 DEBUG    upload finished in 0.769435s, attributes: file_id=53f7f44bad0e
2025-05-26 13:05:48,493 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:48,666 SpawnPoolWorker-35 DEBUG    upload finished in 0.745212s, attributes: file_id=c806cf1621a5
2025-05-26 13:05:48,666 SpawnPoolWorker-35 DEBUG    upload finished in 0.745761s, attributes: file_id=c806cf1621a5
upload:   9%|▉         | 99/1056 [00:13<01:36,  9.89it/s]2025-05-26 13:05:48,667 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e1c43f6baf6f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:48,748 SpawnPoolWorker-39 DEBUG    upload finished in 0.623492s, attributes: file_id=23c8706a3644
2025-05-26 13:05:48,748 SpawnPoolWorker-39 DEBUG    upload finished in 0.624046s, attributes: file_id=23c8706a3644
2025-05-26 13:05:48,749 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:48,904 SpawnPoolWorker-36 DEBUG    upload finished in 0.668895s, attributes: file_id=07ee2bc6d756
2025-05-26 13:05:48,904 SpawnPoolWorker-36 DEBUG    upload finished in 0.669327s, attributes: file_id=07ee2bc6d756
2025-05-26 13:05:48,905 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7608808cbeca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:48,957 SpawnPoolWorker-37 DEBUG    upload finished in 0.753916s, attributes: file_id=e65d37e7ae7f
2025-05-26 13:05:48,958 SpawnPoolWorker-37 DEBUG    upload finished in 0.754352s, attributes: file_id=e65d37e7ae7f
upload:  10%|▉         |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:49,139 SpawnPoolWorker-41 DEBUG    upload finished in 0.748795s, attributes: file_id=10bb83350399
2025-05-26 13:05:49,139 SpawnPoolWorker-41 DEBUG    upload finished in 0.74924s, attributes: file_id=10bb83350399
upload:  10%|▉         | 105/1056 [00:14<01:23, 11.37it/s]2025-05-26 13:05:49,140 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed5ee75b38fe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:49,192 SpawnPoolWorker-40 DEBUG    upload finished in 0.6982s, attributes: file_id=745f799a9fd8
2025-05-26 13:05:49,192 SpawnPoolWorker-40 DEBUG    upload finished in 0.69869s, attributes: file_id=745f799a9fd8
2025-05-26 13:05:49,194

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:49,354 SpawnPoolWorker-35 DEBUG    upload finished in 0.686809s, attributes: file_id=e1c43f6baf6f
2025-05-26 13:05:49,354 SpawnPoolWorker-35 DEBUG    upload finished in 0.687317s, attributes: file_id=e1c43f6baf6f
upload:  10%|█         | 107/1056 [00:14<01:29, 10.65it/s]2025-05-26 13:05:49,356 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fde93cca85bb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:49,435 SpawnPoolWorker-38 DEBUG    upload finished in 0.625331s, attributes: file_id=2f6fd09ec93c
2025-05-26 13:05:49,435 SpawnPoolWorker-38 DEBUG    upload finished in 0.625852s, attributes: file_id=2f6fd09ec93c
2025-05-26 13:05:49,437 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:49,628 SpawnPoolWorker-37 DEBUG    upload finished in 0.669338s, attributes: file_id=32044c2f973f
2025-05-26 13:05:49,629 SpawnPoolWorker-37 DEBUG    upload finished in 0.669814s, attributes: file_id=32044c2f973f
2025-05-26 13:05:49,630 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c5f9cb0738d3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:49,717 SpawnPoolWorker-36 DEBUG    upload finished in 0.811965s, attributes: file_id=7608808cbeca
2025-05-26 13:05:49,717 SpawnPoolWorker-36 DEBUG    upload finished in 0.812431s, attributes: file_id=7608808cbeca
upload:  11%|█         | 111/1056 [00:14<01:27, 10.85it/s]2025-05-26 13:05:49

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:49,904 SpawnPoolWorker-40 DEBUG    upload finished in 0.710731s, attributes: file_id=83b4cea47cdc
2025-05-26 13:05:49,905 SpawnPoolWorker-40 DEBUG    upload finished in 0.711231s, attributes: file_id=83b4cea47cdc
upload:  11%|█         | 113/1056 [00:14<01:27, 10.80it/s]2025-05-26 13:05:49,906 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/aa9263afe9c4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:49,977 SpawnPoolWorker-41 DEBUG    upload finished in 0.837173s, attributes: file_id=ed5ee75b38fe
2025-05-26 13:05:49,978 SpawnPoolWorker-41 DEBUG    upload finished in 0.837633s, attributes: file_id=ed5ee75b38fe
2025-05-26 13:05:49

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:50,131 SpawnPoolWorker-38 DEBUG    upload finished in 0.694158s, attributes: file_id=11d489a7c414
2025-05-26 13:05:50,131 SpawnPoolWorker-38 DEBUG    upload finished in 0.694639s, attributes: file_id=11d489a7c414
2025-05-26 13:05:50,132 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce37a53a37be.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:50,241 SpawnPoolWorker-39 DEBUG    upload finished in 0.701172s, attributes: file_id=db584424487a
2025-05-26 13:05:50,242 SpawnPoolWorker-39 DEBUG    upload finished in 0.701667s, attributes: file_id=db584424487a
upload:  11%|█         |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:50,380 SpawnPoolWorker-36 DEBUG    upload finished in 0.661402s, attributes: file_id=3272a56d2f99
2025-05-26 13:05:50,380 SpawnPoolWorker-36 DEBUG    upload finished in 0.661854s, attributes: file_id=3272a56d2f99
upload:  11%|█▏        | 119/1056 [00:15<01:17, 12.08it/s]2025-05-26 13:05:50,382 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/329931153dcb.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:50,503 SpawnPoolWorker-34 DEBUG    upload finished in 0.680448s, attributes: file_id=dd1aeb01e45a
2025-05-26 13:05:50,503 SpawnPoolWorker-34 DEBUG    upload finished in 0.680893s, attributes: file_id=dd1aeb01e45a
2025-05-26 13:05:50,504 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9c3e021d8d56.json not detected as batch file data
A

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:50,694 SpawnPoolWorker-35 DEBUG    upload finished in 0.623215s, attributes: file_id=d6ae8be8815d
2025-05-26 13:05:50,694 SpawnPoolWorker-35 DEBUG    upload finished in 0.623691s, attributes: file_id=d6ae8be8815d
2025-05-26 13:05:50,695 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/03aa67782f3f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:50,903 SpawnPoolWorker-41 DEBUG    upload finished in 0.924477s, attributes: file_id=27a344a4af8d
2025-05-26 13:05:50,903 SpawnPoolWorker-41 DEBUG    upload finished in 0.925019s, attributes: file_id=27a344a4af8d
upload:  12%|█▏        | 123/1056 [00:15<01:43,  9.04it/s]2025-05-26 13:05:50,905 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fe4192da8028.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:50,972 SpawnPoolWorker-38 DEBUG    upload finished in 0.839401s, attributes: file_id=ce37a53a37be
2025-05-26 13:05:50,972 SpawnPoolWorker-38 DEBUG    upload finished in 0.839866s, attributes: file_id=ce37a53a37be
2025-05-26 13:05:50,973 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:51,238 SpawnPoolWorker-39 DEBUG    upload finished in 0.995332s, attributes: file_id=0711defdce28
2025-05-26 13:05:51,238 SpawnPoolWorker-39 DEBUG    upload finished in 0.995744s, attributes: file_id=0711defdce28
2025-05-26 13:05:51,239 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3541636c2840.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:51,319 SpawnPoolWorker-34 DEBUG    upload finished in 0.81484s, attributes: file_id=9c3e021d8d56
2025-05-26 13:05:51,319 SpawnPoolWorker-34 DEBUG    upload finished in 0.815295s, attributes: file_id=9c3e021d8d56
upload:  12%|█▏        | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:51,472 SpawnPoolWorker-40 DEBUG    upload finished in 0.904489s, attributes: file_id=856c80c3c9a9
2025-05-26 13:05:51,472 SpawnPoolWorker-40 DEBUG    upload finished in 0.904938s, attributes: file_id=856c80c3c9a9
upload:  12%|█▏        | 128/1056 [00:16<01:48,  8.54it/s]2025-05-26 13:05:51,473 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9eb1d54550f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:51,565 SpawnPoolWorker-35 DEBUG    upload finished in 0.870006s, attributes: file_id=03aa67782f3f
2025-05-26 13:05:51,565 SpawnPoolWorker-35 DEBUG    upload finished in 0.870527s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:51,808 SpawnPoolWorker-41 DEBUG    upload finished in 0.903429s, attributes: file_id=fe4192da8028
2025-05-26 13:05:51,808 SpawnPoolWorker-38 DEBUG    upload finished in 0.835098s, attributes: file_id=f3a329f50e66
2025-05-26 13:05:51,809 SpawnPoolWorker-41 DEBUG    upload finished in 0.904178s, attributes: file_id=fe4192da8028
2025-05-26 13:05:51,809 SpawnPoolWorker-38 DEBUG    upload finished in 0.83576s, attributes: file_id=f3a329f50e66
upload:  12%|█▏        | 130/1056 [00:16<02:04,  7.45it/s]2025-05-26 13:05:51,810 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6149dc3d4e99.json not detected as batch file data
2025-05-26 13:05:51,810 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7637130f1680.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:52,096 SpawnPoolWorker-39 DEBUG    upload finished in 0.856856s, attributes: file_id=3541636c2840
2025-05-26 13:05:52,096 SpawnPoolWorker-39 DEBUG    upload finished in 0.857274s, attributes: file_id=3541636c2840
upload:  13%|█▎        | 135/1056 [00:17<01:21, 11.29it/s]2025-05-26 13:05:52,098 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/14e30baea67f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:52,317 SpawnPoolWorker-40 DEBUG    upload finished in 0.844313s, attributes: file_id=9eb1d54550f3
2025-05-26 13:05:52,318 SpawnPoolWorker-40 DEBUG    upload finished in 0.84486s, attributes: file_id=9eb1d54550f3
2025-05-26 13:05:52,319 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1467c6c4b19a.json not detected as batch file data
2025-05-26 13:05:52,427 SpawnPoolWorker-35 DEBUG    upload finished in 0.860709s, attributes: file_id=82eca9fb928d
2025-05-26 13:05:52,427 SpawnPoolWorker-35 DEBUG    upload finished in 0.861183s, attributes: file_id=82eca9fb928d
upload:  13%|█▎        | 137/1056 [00:17<01:41,  9.07it/s]2025-05-26 13:05:52,429 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c2db2718538f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:52,557 SpawnPoolWorker-41 DEBUG    upload finished in 0.747202s, attributes: file_id=6149dc3d4e99
2025-05-26 13:05:52,558 SpawnPoolWorker-41 DEBUG    upload finished in 0.747677s, attributes: file_id=6149dc3d4e99
2025-05-26 13:05:52,559 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/330818b5becb.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:52,625 SpawnPoolWorker-34 DEBUG    upload finished in 0.639894s, attributes: file_id=f7b814a74ce4
2025-05-26 13:05:52,626 SpawnPoolWorker-34 DEBUG    upload finished in 0.640419s, attributes: file_id=f7b814a74ce4
upload:  13%|█▎        | 139/1056 [00:17<01:38,  9.34it/s]2025-05-26 13:05:52

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:52,831 SpawnPoolWorker-38 DEBUG    upload finished in 1.020325s, attributes: file_id=7637130f1680
2025-05-26 13:05:52,831 SpawnPoolWorker-38 DEBUG    upload finished in 1.020925s, attributes: file_id=7637130f1680
2025-05-26 13:05:52,832 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/adeb7945536c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:52,904 SpawnPoolWorker-39 DEBUG    upload finished in 0.806407s, attributes: file_id=14e30baea67f
upload:  14%|█▎        | 143/1056 [00:17<01:21, 11.15it/s]2025-05-26 13:05:52,904 SpawnPoolWorker-39 DEBUG    upload finished in 0.806828s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:53,102 SpawnPoolWorker-35 DEBUG    upload finished in 0.673344s, attributes: file_id=c2db2718538f
2025-05-26 13:05:53,102 SpawnPoolWorker-35 DEBUG    upload finished in 0.673797s, attributes: file_id=c2db2718538f
upload:  14%|█▎        | 145/1056 [00:18<01:24, 10.82it/s]2025-05-26 13:05:53,103 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e7e389c392c9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:53,354 SpawnPoolWorker-34 DEBUG    upload finished in 0.727334s, attributes: file_id=308721054446
2025-05-26 13:05:53,355 SpawnPoolWorker-34 DEBUG    upload finished in 0.727902s, attributes: file_id=308721054446
2025-05-26 13:05:53,356 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2de4f6d9d933.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:53,428 SpawnPoolWorker-41 DEBUG    upload finished in 0.86916s, attributes: file_id=330818b5becb
2025-05-26 13:05:53,428 SpawnPoolWorker-41 DEBUG    upload finished in 0.869759s, attributes: file_id=330818b5becb
upload:  14%|█▍        | 147/1056 [00:18<01:43,  8.82it/s]2025-05-26 13:05:53,430 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:53,594 SpawnPoolWorker-39 DEBUG    upload finished in 0.687711s, attributes: file_id=6a4b87909893
2025-05-26 13:05:53,594 SpawnPoolWorker-39 DEBUG    upload finished in 0.68821s, attributes: file_id=6a4b87909893
upload:  14%|█▍        | 149/1056 [00:18<01:34,  9.59it/s]2025-05-26 13:05:53,596 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b03a2d64b862.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:53,677 SpawnPoolWorker-37 DEBUG    upload finished in 0.985479s, attributes: file_id=76b683657772
2025-05-26 13:05:53,678 SpawnPoolWorker-37 DEBUG    upload finished in 0.985961s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:53,833 SpawnPoolWorker-36 DEBUG    upload finished in 1.096185s, attributes: file_id=5c3bf45d204a
2025-05-26 13:05:53,833 SpawnPoolWorker-36 DEBUG    upload finished in 1.096739s, attributes: file_id=5c3bf45d204a
2025-05-26 13:05:53,835 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/119f4bbcd29b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:53,965 SpawnPoolWorker-35 DEBUG    upload finished in 0.861448s, attributes: file_id=e7e389c392c9
2025-05-26 13:05:53,965 SpawnPoolWorker-35 DEBUG    upload finished in 0.862108s, attributes: file_id=e7e389c392c9
upload:  14%|█▍        |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:54,131 SpawnPoolWorker-34 DEBUG    upload finished in 0.775739s, attributes: file_id=2de4f6d9d933
2025-05-26 13:05:54,132 SpawnPoolWorker-34 DEBUG    upload finished in 0.776317s, attributes: file_id=2de4f6d9d933
2025-05-26 13:05:54,134 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c622251cd19.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:54,237 SpawnPoolWorker-41 DEBUG    upload finished in 0.807455s, attributes: file_id=a19a2c108d95
2025-05-26 13:05:54,238 SpawnPoolWorker-41 DEBUG    upload finished in 0.807929s, attributes: file_id=a19a2c108d95
upload:  15%|█▍        | 155/1056 [00:19<01:39,  9.06it/s]2025-05-26 13:05:54,239 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/94eaa8278615.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:54,499 SpawnPoolWorker-39 DEBUG    upload finished in 0.903446s, attributes: file_id=b03a2d64b862
2025-05-26 13:05:54,499 SpawnPoolWorker-39 DEBUG    upload finished in 0.903874s, attributes: file_id=b03a2d64b862
upload:  15%|█▍        | 157/1056 [00:19<01:44,  8.58it/s]2025-05-26 13:05:54,500 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f7bd7916457f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:54,574 SpawnPoolWorker-38 DEBUG    upload finished in 0.814944s, attributes: file_id=fc6ab532f35a
2025-05-26 13:05:54,574 SpawnPoolWorker-38 DEBUG    upload finished in 0.815467s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:54,702 SpawnPoolWorker-36 DEBUG    upload finished in 0.867908s, attributes: file_id=119f4bbcd29b
2025-05-26 13:05:54,703 SpawnPoolWorker-36 DEBUG    upload finished in 0.868454s, attributes: file_id=119f4bbcd29b
2025-05-26 13:05:54,704 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/045ae2e4e9b5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:54,818 SpawnPoolWorker-35 DEBUG    upload finished in 0.851072s, attributes: file_id=33342948658a
2025-05-26 13:05:54,818 SpawnPoolWorker-35 DEBUG    upload finished in 0.851611s, attributes: file_id=33342948658a
upload:  15%|█▌        |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:55,019 SpawnPoolWorker-41 DEBUG    upload finished in 0.780813s, attributes: file_id=94eaa8278615
2025-05-26 13:05:55,020 SpawnPoolWorker-41 DEBUG    upload finished in 0.781216s, attributes: file_id=94eaa8278615
upload:  15%|█▌        | 163/1056 [00:20<01:29, 10.01it/s]2025-05-26 13:05:55,021 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ebe699f66539.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:55,090 SpawnPoolWorker-40 DEBUG    upload finished in 0.772792s, attributes: file_id=929e2316c7a2
2025-05-26 13:05:55,090 SpawnPoolWorker-40 DEBUG    upload finished in 0.773317s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:55,250 SpawnPoolWorker-38 DEBUG    upload finished in 0.674926s, attributes: file_id=ac9dd2c49c7f
2025-05-26 13:05:55,250 SpawnPoolWorker-38 DEBUG    upload finished in 0.675409s, attributes: file_id=ac9dd2c49c7f
2025-05-26 13:05:55,252 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d2943db3c624.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:55,321 SpawnPoolWorker-37 DEBUG    upload finished in 0.699285s, attributes: file_id=91d41bbb907d
2025-05-26 13:05:55,321 SpawnPoolWorker-37 DEBUG    upload finished in 0.699758s, attributes: file_id=91d41bbb907d
upload:  16%|█▌        | 167/1056 [00:20<01:18, 11.38it/s]2025-05-26 13:05:55

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:55,504 SpawnPoolWorker-35 DEBUG    upload finished in 0.684645s, attributes: file_id=04841368ecad
2025-05-26 13:05:55,504 SpawnPoolWorker-35 DEBUG    upload finished in 0.685077s, attributes: file_id=04841368ecad
upload:  16%|█▌        | 169/1056 [00:20<01:18, 11.24it/s]2025-05-26 13:05:55,505 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d0a9f40960e0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:55,604 SpawnPoolWorker-34 DEBUG    upload finished in 0.720232s, attributes: file_id=9d45173a4cc3
2025-05-26 13:05:55,605 SpawnPoolWorker-34 DEBUG    upload finished in 0.720729s, attributes: file_id=9d45173a4cc3
2025-05-26 13:05:55

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:55,785 SpawnPoolWorker-40 DEBUG    upload finished in 0.693038s, attributes: file_id=caec606f0be3
2025-05-26 13:05:55,785 SpawnPoolWorker-40 DEBUG    upload finished in 0.6935s, attributes: file_id=caec606f0be3
2025-05-26 13:05:55,786 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f0be6e0e2b6.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:55,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.705784s, attributes: file_id=6c8af39ec793
2025-05-26 13:05:55,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.706247s, attributes: file_id=6c8af39ec793
upload:  16%|█▋        | 173/1056 [00:20<01:18, 11.19it/s]2025-05-26 13:05:55,869 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5389834ea3d8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:56,025 SpawnPoolWorker-37 DEBUG    upload finished in 0.70246s, attributes: file_id=fc020f401a58
2025-05-26 13:05:56,025 SpawnPoolWorker-37 DEBUG    upload finished in 0.702912s, attributes: file_id=fc020f401a58
upload:  17%|█▋        | 175/1056 [00:21<01:15, 11.61it/s]2025-05-26 13:05:56,026 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/df8c6b99693b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:56,125 SpawnPoolWorker-36 DEBUG    upload finished in 0.707545s, attributes: file_id=385595b0fe57
2025-05-26 13:05:56,126 SpawnPoolWorker-36 DEBUG    upload finished in 0.708017s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:56,274 SpawnPoolWorker-35 DEBUG    upload finished in 0.768492s, attributes: file_id=d0a9f40960e0
2025-05-26 13:05:56,274 SpawnPoolWorker-35 DEBUG    upload finished in 0.768905s, attributes: file_id=d0a9f40960e0
upload:  17%|█▋        | 177/1056 [00:21<01:25, 10.25it/s]2025-05-26 13:05:56,275 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ae753618b6ee.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:56,316 SpawnPoolWorker-41 DEBUG    upload finished in 0.615454s, attributes: file_id=f80e6ae7af95
2025-05-26 13:05:56,317 SpawnPoolWorker-41 DEBUG    upload finished in 0.616119s, attributes: file_id=f80e6ae7af95
2025-05-26 13:05:56,318 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:56,622 SpawnPoolWorker-39 DEBUG    upload finished in 0.752389s, attributes: file_id=5389834ea3d8
2025-05-26 13:05:56,622 SpawnPoolWorker-39 DEBUG    upload finished in 0.752949s, attributes: file_id=5389834ea3d8
upload:  17%|█▋        | 181/1056 [00:21<01:22, 10.58it/s]2025-05-26 13:05:56,624 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e1c51cdd77cf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:56,678 SpawnPoolWorker-36 DEBUG    upload finished in 0.551576s, attributes: file_id=3de0d0385f2f
2025-05-26 13:05:56,679 SpawnPoolWorker-36 DEBUG    upload finished in 0.55209s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:56,858 SpawnPoolWorker-37 DEBUG    upload finished in 0.831581s, attributes: file_id=df8c6b99693b
2025-05-26 13:05:56,858 SpawnPoolWorker-37 DEBUG    upload finished in 0.832072s, attributes: file_id=df8c6b99693b
2025-05-26 13:05:56,859 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a05d45c19bf7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,021 SpawnPoolWorker-35 DEBUG    upload finished in 0.746234s, attributes: file_id=ae753618b6ee
2025-05-26 13:05:57,022 SpawnPoolWorker-35 DEBUG    upload finished in 0.746694s, attributes: file_id=ae753618b6ee
upload:  18%|█▊        |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:57,088 SpawnPoolWorker-34 DEBUG    upload finished in 0.688806s, attributes: file_id=1d0b4d7c4fb9
2025-05-26 13:05:57,088 SpawnPoolWorker-34 DEBUG    upload finished in 0.689389s, attributes: file_id=1d0b4d7c4fb9
2025-05-26 13:05:57,089 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3f45bf371b36.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,171 SpawnPoolWorker-40 DEBUG    upload finished in 0.702577s, attributes: file_id=1f8237ed58cd
2025-05-26 13:05:57,171 SpawnPoolWorker-40 DEBUG    upload finished in 0.703129s, attributes: file_id=1f8237ed58cd
upload:  18%|█▊        | 187/1056 [00:22<01:20, 10.86it/s]2025-05-26 13:05:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,325 SpawnPoolWorker-41 DEBUG    upload finished in 1.007697s, attributes: file_id=bc9c6776976a
2025-05-26 13:05:57,326 SpawnPoolWorker-41 DEBUG    upload finished in 1.008128s, attributes: file_id=bc9c6776976a
2025-05-26 13:05:57,327 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d62cfdc7f56b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:57,349 SpawnPoolWorker-36 DEBUG    upload finished in 0.668756s, attributes: file_id=46399c8ea63f
2025-05-26 13:05:57,349 SpawnPoolWorker-36 DEBUG    upload finished in 0.669159s, attributes: file_id=46399c8ea63f
upload:  18%|█▊        | 189/1056 [00:22<01:18, 10.98it/s]2025-05-26 13:05:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,533 SpawnPoolWorker-37 DEBUG    upload finished in 0.673334s, attributes: file_id=a05d45c19bf7
2025-05-26 13:05:57,533 SpawnPoolWorker-37 DEBUG    upload finished in 0.673902s, attributes: file_id=a05d45c19bf7
upload:  18%|█▊        | 191/1056 [00:22<01:19, 10.94it/s]2025-05-26 13:05:57,534 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3ece5d4740e7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:57,626 SpawnPoolWorker-38 DEBUG    upload finished in 0.852254s, attributes: file_id=ec16d98dd292
2025-05-26 13:05:57,626 SpawnPoolWorker-38 DEBUG    upload finished in 0.852817s, attributes: file_id=ec16d98dd292
2025-05-26 13:05:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,794 SpawnPoolWorker-34 DEBUG    upload finished in 0.704956s, attributes: file_id=3f45bf371b36
2025-05-26 13:05:57,794 SpawnPoolWorker-34 DEBUG    upload finished in 0.705355s, attributes: file_id=3f45bf371b36
2025-05-26 13:05:57,795 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f51a53ff5b6b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:57,961 SpawnPoolWorker-40 DEBUG    upload finished in 0.788265s, attributes: file_id=0c9a1bc7c4a4
2025-05-26 13:05:57,961 SpawnPoolWorker-40 DEBUG    upload finished in 0.788802s, attributes: file_id=0c9a1bc7c4a4
upload:  18%|█▊        |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:58,068 SpawnPoolWorker-36 DEBUG    upload finished in 0.718238s, attributes: file_id=a315aa4b28cf
2025-05-26 13:05:58,068 SpawnPoolWorker-36 DEBUG    upload finished in 0.718708s, attributes: file_id=a315aa4b28cf
2025-05-26 13:05:58,070 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74ee6a3c66bc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:58,150 SpawnPoolWorker-41 DEBUG    upload finished in 0.823732s, attributes: file_id=d62cfdc7f56b
2025-05-26 13:05:58,151 SpawnPoolWorker-41 DEBUG    upload finished in 0.824136s, attributes: file_id=d62cfdc7f56b
upload:  19%|█▊        | 197/1056 [00:23<01:25,  9.99it/s]2025-05-26 13:05:58

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:58,330 SpawnPoolWorker-39 DEBUG    upload finished in 0.886775s, attributes: file_id=0509fb41bcde
2025-05-26 13:05:58,331 SpawnPoolWorker-39 DEBUG    upload finished in 0.887232s, attributes: file_id=0509fb41bcde
2025-05-26 13:05:58,332 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1079fded4c9e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Remov

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:05:58,669 SpawnPoolWorker-34 DEBUG    upload finished in 0.873551s, attributes: file_id=f51a53ff5b6b
2025-05-26 13:05:58,669 SpawnPoolWorker-34 DEBUG    upload finished in 0.87411s, attributes: file_id=f51a53ff5b6b
2025-05-26 13:05:58,671 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99ca60e61acb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:58,791 SpawnPoolWorker-40 DEBUG    upload finished in 0.828397s, attributes: file_id=6871de7467d0
2025-05-26 13:05:58,791 SpawnPoolWorker-40 DEBUG    upload finished in 0.828899s, attributes: file_id=6871de7467d0
upload:  19%|█▉        | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:58,924 SpawnPoolWorker-36 DEBUG    upload finished in 0.85452s, attributes: file_id=74ee6a3c66bc
2025-05-26 13:05:58,924 SpawnPoolWorker-36 DEBUG    upload finished in 0.855008s, attributes: file_id=74ee6a3c66bc
2025-05-26 13:05:58,925 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f46caf92c79.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:59,103 SpawnPoolWorker-37 DEBUG    upload finished in 0.864459s, attributes: file_id=614a3493e889
2025-05-26 13:05:59,103 SpawnPoolWorker-37 DEBUG    upload finished in 0.864826s, attributes: file_id=614a3493e889
upload:  19%|█▉        | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:05:59,217 SpawnPoolWorker-41 DEBUG    upload finished in 1.065827s, attributes: file_id=0890673d2452
2025-05-26 13:05:59,218 SpawnPoolWorker-41 DEBUG    upload finished in 1.06633s, attributes: file_id=0890673d2452
upload:  20%|█▉        | 206/1056 [00:24<01:45,  8.04it/s]2025-05-26 13:05:59,219 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e3aac8436350.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:05:59,294 SpawnPoolWorker-35 DEBUG    upload finished in 1.026478s, attributes: file_id=4a0f134d4f55
2025-05-26 13:05:59,295 SpawnPoolWorker-35 DEBUG    upload finished in 1.026959s, attributes: file_id=4a0f134d4f55
2025-05-26 13:05:59,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:59,450 SpawnPoolWorker-34 DEBUG    upload finished in 0.779539s, attributes: file_id=99ca60e61acb
2025-05-26 13:05:59,451 SpawnPoolWorker-34 DEBUG    upload finished in 0.780135s, attributes: file_id=99ca60e61acb
2025-05-26 13:05:59,452 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d804dac61747.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:59,534 SpawnPoolWorker-40 DEBUG    upload finished in 0.741981s, attributes: file_id=49129c2634b0
2025-05-26 13:05:59,535 SpawnPoolWorker-40 DEBUG    upload finished in 0.742413s, attributes: file_id=49129c2634b0
upload:  20%|█▉        | 210/1056 [00:24<01:24,  9.99it/s]2025-05-26 13:05:59

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:05:59,736 SpawnPoolWorker-36 DEBUG    upload finished in 0.810421s, attributes: file_id=6f46caf92c79
2025-05-26 13:05:59,736 SpawnPoolWorker-36 DEBUG    upload finished in 0.81096s, attributes: file_id=6f46caf92c79
upload:  20%|██        | 212/1056 [00:24<01:24,  9.97it/s]2025-05-26 13:05:59,737 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ea6f66e6f447.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:05:59,816 SpawnPoolWorker-37 DEBUG    upload finished in 0.711707s, attributes: file_id=3097d61e47c7
2025-05-26 13:05:59,816 SpawnPoolWorker-37 DEBUG    upload finished in 0.712213s, attributes: file_id=3097d61e47c7
2025-05-26 13:05:59,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:05:59,969 SpawnPoolWorker-41 DEBUG    upload finished in 0.749996s, attributes: file_id=e3aac8436350
2025-05-26 13:05:59,969 SpawnPoolWorker-41 DEBUG    upload finished in 0.750456s, attributes: file_id=e3aac8436350
upload:  20%|██        | 214/1056 [00:24<01:28,  9.49it/s]2025-05-26 13:05:59,970 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1c2ad83797c7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:00,025 SpawnPoolWorker-35 DEBUG    upload finished in 0.729184s, attributes: file_id=30a089f2f6db
2025-05-26 13:06:00,025 SpawnPoolWorker-35 DEBUG    upload finished in 0.729654s, attributes: file_id=30a089f2f6db
2025-05-26 13:06:00,026 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:00,233 SpawnPoolWorker-40 DEBUG    upload finished in 0.697218s, attributes: file_id=2cb11f86858b
2025-05-26 13:06:00,233 SpawnPoolWorker-40 DEBUG    upload finished in 0.697686s, attributes: file_id=2cb11f86858b
2025-05-26 13:06:00,235 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9306cc371888.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:00,338 SpawnPoolWorker-34 DEBUG    upload finished in 0.885791s, attributes: file_id=d804dac61747
2025-05-26 13:06:00,338 SpawnPoolWorker-34 DEBUG    upload finished in 0.886341s, attributes: file_id=d804dac61747
upload:  21%|██        | 218/1056 [00:25<01:22, 10.21it/s]2025-05-26 13:06:00

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:00,504 SpawnPoolWorker-36 DEBUG    upload finished in 0.766781s, attributes: file_id=ea6f66e6f447
2025-05-26 13:06:00,504 SpawnPoolWorker-36 DEBUG    upload finished in 0.767263s, attributes: file_id=ea6f66e6f447
upload:  21%|██        | 220/1056 [00:25<01:18, 10.71it/s]2025-05-26 13:06:00,505 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ff0809cc63c6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:00,554 SpawnPoolWorker-37 DEBUG    upload finished in 0.736423s, attributes: file_id=d57f5c9281e2
2025-05-26 13:06:00,554 SpawnPoolWorker-37 DEBUG    upload finished in 0.736857s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:00,723 SpawnPoolWorker-41 DEBUG    upload finished in 0.752964s, attributes: file_id=1c2ad83797c7
2025-05-26 13:06:00,724 SpawnPoolWorker-41 DEBUG    upload finished in 0.753622s, attributes: file_id=1c2ad83797c7
upload:  21%|██        | 222/1056 [00:25<01:22, 10.17it/s]2025-05-26 13:06:00,726 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8328c4a5537.json not detected as batch file data
2025-05-26 13:06:00,802 SpawnPoolWorker-35 DEBUG    upload finished in 0.775278s, attributes: file_id=02a070a63f4f
2025-05-26 13:06:00,802 SpawnPoolWorker-35 DEBUG    upload finished in 0.775769s, attributes: file_id=02a070a63f4f
2025-05-26 13:06:00,803 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f30023bed632.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:00,935 SpawnPoolWorker-40 DEBUG    upload finished in 0.701121s, attributes: file_id=9306cc371888
2025-05-26 13:06:00,936 SpawnPoolWorker-40 DEBUG    upload finished in 0.701676s, attributes: file_id=9306cc371888
2025-05-26 13:06:00,938 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/973e7dbc76d0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:01,009 SpawnPoolWorker-34 DEBUG    upload finished in 0.66917s, attributes: file_id=a7443365bd74
2025-05-26 13:06:01,009 SpawnPoolWorker-34 DEBUG    upload finished in 0.669821s, attributes: file_id=a7443365bd74
upload:  21%|██▏       | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:01,297 SpawnPoolWorker-36 DEBUG    upload finished in 0.791523s, attributes: file_id=ff0809cc63c6
2025-05-26 13:06:01,297 SpawnPoolWorker-36 DEBUG    upload finished in 0.791979s, attributes: file_id=ff0809cc63c6
upload:  22%|██▏       | 228/1056 [00:26<01:25,  9.73it/s]2025-05-26 13:06:01,299 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7e1e41d469b0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:01,598 SpawnPoolWorker-35 DEBUG    upload finished in 0.794653s, attributes: file_id=f30023bed632
2025-05-26 13:06:01,598 SpawnPoolWorker-35 DEBUG    upload finished in 0.79522s, attributes: file_id=f30023bed632
upload:  22%|██▏       | 232/1056 [00:26<01:13, 11.18it/s]2025-05-26 13:06:01,600 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/443e4835dac3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:01,667 SpawnPoolWorker-40 DEBUG    upload finished in 0.729446s, attributes: file_id=973e7dbc76d0
2025-05-26 13:06:01,667 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:01,894 SpawnPoolWorker-34 DEBUG    upload finished in 0.883506s, attributes: file_id=d17f1da2dccf
2025-05-26 13:06:01,895 SpawnPoolWorker-34 DEBUG    upload finished in 0.884007s, attributes: file_id=d17f1da2dccf
upload:  22%|██▏       | 234/1056 [00:26<01:27,  9.34it/s]2025-05-26 13:06:01,896 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/23dacb5e1e16.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:01,923 SpawnPoolWorker-38 DEBUG    upload finished in 0.794402s, attributes: file_id=b43520d8ecf0
2025-05-26 13:06:01,923 SpawnPoolWorker-38 DEBUG    upload finished in 0.794869s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:02,250 SpawnPoolWorker-36 DEBUG    upload finished in 0.950983s, attributes: file_id=7e1e41d469b0
2025-05-26 13:06:02,250 SpawnPoolWorker-36 DEBUG    upload finished in 0.951914s, attributes: file_id=7e1e41d469b0
upload:  22%|██▏       | 236/1056 [00:27<01:45,  7.80it/s]2025-05-26 13:06:02,253 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9dc94f9de504.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:02,374 SpawnPoolWorker-39 DEBUG    upload finished in 0.89806s, attributes: file_id=15d065614483
2025-05-26 13:06:02,374 SpawnPoolWorker-39 DEBUG    upload finished in 0.898537s, attributes: file_id=15d065614483
upload:  22%|██▏       | 237/1056 [00:27<01:44,  7.84it/s]2025-05-26 13:0

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:02,461 SpawnPoolWorker-40 DEBUG    upload finished in 0.79253s, attributes: file_id=6914678728f9
2025-05-26 13:06:02,461 SpawnPoolWorker-40 DEBUG    upload finished in 0.793007s, attributes: file_id=6914678728f9
2025-05-26 13:06:02,463 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5bfbf55db27d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:02,603 SpawnPoolWorker-38 DEBUG    upload finished in 0.67804s, attributes: file_id=b29e6bad3f43
2025-05-26 13:06:02,603 SpawnPoolWorker-38 DEBUG    upload finished in 0.678523s, attributes: file_id=b29e6bad3f43
upload:  23%|██▎       | 2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:02,792 SpawnPoolWorker-41 DEBUG    upload finished in 1.358109s, attributes: file_id=4ef2adf4fe8a
2025-05-26 13:06:02,792 SpawnPoolWorker-41 DEBUG    upload finished in 1.358728s, attributes: file_id=4ef2adf4fe8a
upload:  23%|██▎       | 240/1056 [00:27<01:50,  7.36it/s]2025-05-26 13:06:02,794 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d542a4ad8286.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:02,988 SpawnPoolWorker-35 DEBUG    upload finished in 1.388547s, attributes: file_id=443e4835dac3
2025-05-26 13:06:02,988 SpawnPoolWorker-35 DEBUG    upload finished in 1.389039s, attributes: file_id=443e4835dac3
upload:  23%|██▎   

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:03,170 SpawnPoolWorker-36 DEBUG    upload finished in 0.917835s, attributes: file_id=9dc94f9de504
2025-05-26 13:06:03,171 SpawnPoolWorker-36 DEBUG    upload finished in 0.918609s, attributes: file_id=9dc94f9de504
upload:  23%|██▎       | 242/1056 [00:28<02:07,  6.38it/s]2025-05-26 13:06:03,173 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f0fa0ea23c36.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:03,275 SpawnPoolWorker-39 DEBUG    upload finished in 0.899843s, attributes: file_id=ce092251f7bd
2025-05-26 13:06:03,276 SpawnPoolWorker-39 DEBUG    upload finished in 0.900285s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:03,442 SpawnPoolWorker-34 DEBUG    upload finished in 1.546157s, attributes: file_id=23dacb5e1e16
2025-05-26 13:06:03,443 SpawnPoolWorker-34 DEBUG    upload finished in 1.546656s, attributes: file_id=23dacb5e1e16
upload:  23%|██▎       | 245/1056 [00:28<01:36,  8.44it/s]2025-05-26 13:06:03,444 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/45cfb3a1cf9f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:03,506 SpawnPoolWorker-38 DEBUG    upload finished in 0.902395s, attributes: file_id=f88eded3b396
2025-05-26 13:06:03,507 SpawnPoolWorker-38 DEBUG    upload finished in 0.902842s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:03,702 SpawnPoolWorker-41 DEBUG    upload finished in 0.908173s, attributes: file_id=d542a4ad8286
2025-05-26 13:06:03,703 SpawnPoolWorker-41 DEBUG    upload finished in 0.908672s, attributes: file_id=d542a4ad8286
2025-05-26 13:06:03,704 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/822d138675c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:03,788 SpawnPoolWorker-35 DEBUG    upload finished in 0.798652s, attributes: file_id=35989320e8f1
2025-05-26 13:06:03,788 SpawnPoolWorker-35 DEBUG    upload finished in 0.799103s, attributes: file_id=35989320e8f1
upload:  24%|██▎       | 249/1056 [00:28<01:19, 10.13it/s]2025-05-26 13:06:03

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:03,904 SpawnPoolWorker-36 DEBUG    upload finished in 0.731554s, attributes: file_id=f0fa0ea23c36
2025-05-26 13:06:03,905 SpawnPoolWorker-36 DEBUG    upload finished in 0.732213s, attributes: file_id=f0fa0ea23c36
2025-05-26 13:06:03,906 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28d605eac0ea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:04,023 SpawnPoolWorker-39 DEBUG    upload finished in 0.746435s, attributes: file_id=d7189a7100da
2025-05-26 13:06:04,023 SpawnPoolWorker-39 DEBUG    upload finished in 0.746916s, attributes: file_id=d7189a7100da
upload:  24%|██▍       | 251/1056 [00:29<01:24,  9.53it/s]2025-05-26 13:06:04

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:04,163 SpawnPoolWorker-37 DEBUG    upload finished in 0.792576s, attributes: file_id=b67f9ade92d6
2025-05-26 13:06:04,164 SpawnPoolWorker-37 DEBUG    upload finished in 0.794326s, attributes: file_id=b67f9ade92d6
Removed trailing semicolon and whitespace from query
upload:  24%|██▍       | 252/1056 [00:29<01:31,  8.83it/s]2025-05-26 13:06:04,173 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/23496335b411.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:04,266 SpawnPoolWorker-34 DEBUG    upload finished in 0.822765s, attributes: file_id=45cfb3a1cf9f
2025-05-26 13:06:04,267 SpawnPoolWorker-34 DEBUG    upload finished in 0.82321s, attributes: file_id=45cfb3a1cf9f
2025-05-26 13:06:04,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:04,396 SpawnPoolWorker-38 DEBUG    upload finished in 0.887994s, attributes: file_id=15c4eb7ad76e
2025-05-26 13:06:04,396 SpawnPoolWorker-38 DEBUG    upload finished in 0.888435s, attributes: file_id=15c4eb7ad76e
2025-05-26 13:06:04,397 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/be1bf5793001.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:04,523 SpawnPoolWorker-41 DEBUG    upload finished in 0.819379s, attributes: file_id=822d138675c5
2025-05-26 13:06:04,523 SpawnPoolWorker-41 DEBUG    upload finished in 0.819932s, attributes: file_id=822d138675c5
upload:  24%|██▍       | 256/1056 [00:29<01:20,  9.89it/s]2025-05-26 13:06:04,525 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28dd9e363d25.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:04,623 SpawnPoolWorker-35 DEBUG    upload finished in 0.83362s, attributes: file_id=51aa43ff4980
2025-05-26 13:06:04,623 SpawnPoolWorker-35 DEBUG    upload finished in 0.834113s, attributes: file_id=51aa43ff4980
2025-05-26 13:06:04,625 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0457ba9a03fa.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:04,724 SpawnPoolWorker-36 DEBUG    upload finished in 0.817959s, attributes: file_id=28d605eac0ea
2025-05-26 13:06:04,724 SpawnPoolWorker-36 DEBUG    upload finished in 0.818458s, attributes: file_id=28d605eac0ea
upload:  24%|██▍       | 258/1056 [00:29<01:20,  9.91it/s]2025-05-26 13:06:04,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:04,956 SpawnPoolWorker-37 DEBUG    upload finished in 0.783136s, attributes: file_id=23496335b411
2025-05-26 13:06:04,956 SpawnPoolWorker-37 DEBUG    upload finished in 0.783651s, attributes: file_id=23496335b411
upload:  25%|██▍       | 260/1056 [00:29<01:24,  9.46it/s]2025-05-26 13:06:04,958 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/589e49fa4187.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:05,022 SpawnPoolWorker-38 DEBUG    upload finished in 0.624876s, attributes: file_id=be1bf5793001
2025-05-26 13:06:05,022 SpawnPoolWorker-38 DEBUG    upload finished in 0.625297s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:05,173 SpawnPoolWorker-41 DEBUG    upload finished in 0.648226s, attributes: file_id=28dd9e363d25
2025-05-26 13:06:05,173 SpawnPoolWorker-41 DEBUG    upload finished in 0.648725s, attributes: file_id=28dd9e363d25
2025-05-26 13:06:05,175 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/189e4ca4b21e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:05,288 SpawnPoolWorker-34 DEBUG    upload finished in 1.020558s, attributes: file_id=298ff4a3fa4a
2025-05-26 13:06:05,289 SpawnPoolWorker-34 DEBUG    upload finished in 1.021104s, attributes: file_id=298ff4a3fa4a
upload:  25%|██▌       | 264/1056 [00:30<01:14, 10.63it/s]2025-05-26 13:06:05

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:05,440 SpawnPoolWorker-35 DEBUG    upload finished in 0.815699s, attributes: file_id=0457ba9a03fa
2025-05-26 13:06:05,441 SpawnPoolWorker-35 DEBUG    upload finished in 0.816124s, attributes: file_id=0457ba9a03fa
upload:  25%|██▌       | 266/1056 [00:30<01:09, 11.30it/s]2025-05-26 13:06:05,442 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0cd01b114165.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:05,568 SpawnPoolWorker-39 DEBUG    upload finished in 0.756002s, attributes: file_id=be23e7cc90d5
2025-05-26 13:06:05,568 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:05,667 SpawnPoolWorker-37 DEBUG    upload finished in 0.708956s, attributes: file_id=589e49fa4187
2025-05-26 13:06:05,667 SpawnPoolWorker-37 DEBUG    upload finished in 0.709533s, attributes: file_id=589e49fa4187
upload:  25%|██▌       | 268/1056 [00:30<01:15, 10.42it/s]2025-05-26 13:06:05,668 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d1766b31024b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:05,736 SpawnPoolWorker-38 DEBUG    upload finished in 0.712885s, attributes: file_id=976e9c01e2f2
2025-05-26 13:06:05,736 SpawnPoolWorker-38 DEBUG    upload finished in 0.713302s, attributes: file_id=976e9c01e2f2
2025-05-26 13:06:05

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:05,918 SpawnPoolWorker-41 DEBUG    upload finished in 0.743331s, attributes: file_id=189e4ca4b21e
2025-05-26 13:06:05,918 SpawnPoolWorker-41 DEBUG    upload finished in 0.743798s, attributes: file_id=189e4ca4b21e
2025-05-26 13:06:05,919 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f5fb00a5b06a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,013 SpawnPoolWorker-34 DEBUG    upload finished in 0.72316s, attributes: file_id=185df24eebb3
2025-05-26 13:06:06,014 SpawnPoolWorker-34 DEBUG    upload finished in 0.723715s, attributes: file_id=185df24eebb3
upload:  26%|██▌       | 272/1056 [00:30<01:11, 10.94it/s]2025-05-26 13:06:06,015 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b1d7e7b02f90.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: htt

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,196 SpawnPoolWorker-35 DEBUG    upload finished in 0.754661s, attributes: file_id=0cd01b114165
2025-05-26 13:06:06,197 SpawnPoolWorker-35 DEBUG    upload finished in 0.75513s, attributes: file_id=0cd01b114165
upload:  26%|██▌       | 274/1056 [00:31<01:11, 10.94it/s]2025-05-26 13:06:06,198 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/191922a049c6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,340 SpawnPoolWorker-39 DEBUG    upload finished in 0.769856s, attributes: file_id=14c112a4e0bd
2025-05-26 13:06:06,340 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:06,404 SpawnPoolWorker-37 DEBUG    upload finished in 0.736513s, attributes: file_id=d1766b31024b
2025-05-26 13:06:06,405 SpawnPoolWorker-37 DEBUG    upload finished in 0.737068s, attributes: file_id=d1766b31024b
upload:  26%|██▌       | 276/1056 [00:31<01:14, 10.50it/s]2025-05-26 13:06:06,406 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7e55dc6f0b98.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,516 SpawnPoolWorker-38 DEBUG    upload finished in 0.779085s, attributes: file_id=e18a80aa4ddc
2025-05-26 13:06:06,517 SpawnPoolWorker-38 DEBUG    upload finished in 0.779567s, attributes: file_id=e18a80aa4ddc
2025-05-26 13:06:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,645 SpawnPoolWorker-40 DEBUG    upload finished in 0.807213s, attributes: file_id=6e53f27ebed8
2025-05-26 13:06:06,645 SpawnPoolWorker-40 DEBUG    upload finished in 0.807666s, attributes: file_id=6e53f27ebed8
2025-05-26 13:06:06,646 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/10f139ce6eea.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,760 SpawnPoolWorker-34 DEBUG    upload finished in 0.745802s, attributes: file_id=b1d7e7b02f90
2025-05-26 13:06:06,761 SpawnPoolWorker-34 DEBUG    upload finished in 0.746199s, attr

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:06,857 SpawnPoolWorker-36 DEBUG    upload finished in 0.760354s, attributes: file_id=b178bb09e567
2025-05-26 13:06:06,857 SpawnPoolWorker-36 DEBUG    upload finished in 0.760813s, attributes: file_id=b178bb09e567
2025-05-26 13:06:06,858 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/68de3c3e3a7a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:06,927 SpawnPoolWorker-35 DEBUG    upload finished in 0.729642s, attributes: file_id=191922a049c6
2025-05-26 13:06:06,928 SpawnPoolWorker-35 DEBUG    upload finished in 0.730143s, attributes: file_id=191922a049c6
upload:  27%|██▋       | 282/1056 [00:31<01:10, 11.03it/s]2025-05-26 13:06:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:07,096 SpawnPoolWorker-37 DEBUG    upload finished in 0.690084s, attributes: file_id=7e55dc6f0b98
2025-05-26 13:06:07,097 SpawnPoolWorker-37 DEBUG    upload finished in 0.690541s, attributes: file_id=7e55dc6f0b98
upload:  27%|██▋       | 284/1056 [00:32<01:08, 11.26it/s]2025-05-26 13:06:07,098 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ef0196ce8e0c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:07,237 SpawnPoolWorker-38 DEBUG    upload finished in 0.719308s, attributes: file_id=5a06bbd3fe6b
2025-05-26 13:06:07,238 SpawnPoolWorker-38 DEBUG    upload finished in 0.719802s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:07,345 SpawnPoolWorker-41 DEBUG    upload finished in 0.798492s, attributes: file_id=dec7a0c6c158
2025-05-26 13:06:07,345 SpawnPoolWorker-41 DEBUG    upload finished in 0.798966s, attributes: file_id=dec7a0c6c158
upload:  27%|██▋       | 286/1056 [00:32<01:16, 10.06it/s]2025-05-26 13:06:07,346 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e54f8f3aef9e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:07,393 SpawnPoolWorker-40 DEBUG    upload finished in 0.746804s, attributes: file_id=10f139ce6eea
2025-05-26 13:06:07,393 SpawnPoolWorker-40 DEBUG    upload finished in 0.747278s, attributes: file_id=10f139ce6eea
2025-05-26 13:06:07

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:07,644 SpawnPoolWorker-36 DEBUG    upload finished in 0.785353s, attributes: file_id=68de3c3e3a7a
2025-05-26 13:06:07,644 SpawnPoolWorker-36 DEBUG    upload finished in 0.785881s, attributes: file_id=68de3c3e3a7a
2025-05-26 13:06:07,645 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e888e7dc1ef3.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:07,718 SpawnPoolWorker-35 DEBUG    upload finished in 0.789247s, attributes: file_id=95817f4b20ed
2025-05-26 13:06:07,719 SpawnPoolWorker-35 DEBUG    upload finished in 0.789856s, attributes: file_id=95817f4b20ed
upload:  27%|██▋       | 290/1056 [00:32<01:14, 10.28it/s]2025-05-26 13:06:07,720 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6bd3a61a9eef.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:07,878 SpawnPoolWorker-39 DEBUG    upload finished in 0.849893s, attributes: file_id=200ca258df7b
2025-05-26 13:06:07,879 SpawnPoolWorker-39 DEBUG    upload finished in 0.850405s, attributes: file_id=200ca258df7b
upload:  28%|██▊       | 292/1056 [00:32<01:10, 10.86it/s]2025-05-26 13:06:07,880 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1678194517d2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:08,070 SpawnPoolWorker-38 DEBUG    upload finished in 0.831375s, attributes: file_id=086e76fff2bc
2025-05-26 13:06:08,070 SpawnPoolWorker-38 DEBUG    upload finished in 0.831797s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:08,152 SpawnPoolWorker-40 DEBUG    upload finished in 0.757642s, attributes: file_id=1d8cbbea3e11
2025-05-26 13:06:08,152 SpawnPoolWorker-40 DEBUG    upload finished in 0.758119s, attributes: file_id=1d8cbbea3e11
upload:  28%|██▊       | 294/1056 [00:33<01:20,  9.48it/s]2025-05-26 13:06:08,154 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51af7ae581b0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:08,225 SpawnPoolWorker-41 DEBUG    upload finished in 0.87853s, attributes: file_id=e54f8f3aef9e
2025-05-26 13:06:08,225 SpawnPoolWorker-41 DEBUG    upload finished in 0.879023s, attributes: file_id=e54f8f3aef9e
2025-05-26 13:06:08,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:08,380 SpawnPoolWorker-34 DEBUG    upload finished in 0.867864s, attributes: file_id=75a1dc88b32d
2025-05-26 13:06:08,380 SpawnPoolWorker-34 DEBUG    upload finished in 0.868389s, attributes: file_id=75a1dc88b32d
2025-05-26 13:06:08,382 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/513729de0771.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:08,510 SpawnPoolWorker-35 DEBUG    upload finished in 0.790167s, attributes: file_id=6bd3a61a9eef
2025-05-26 13:06:08,510 SpawnPoolWorker-35 DEBUG    upload finished in 0.790733s, attributes: file_id=6bd3a61a9eef
upload:  28%|██▊       | 298/1056 [00:33<01:14, 10.12it/s]2025-05-26 13:06:08

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:08,600 SpawnPoolWorker-37 DEBUG    upload finished in 0.82043s, attributes: file_id=af4e8027a422
2025-05-26 13:06:08,600 SpawnPoolWorker-37 DEBUG    upload finished in 0.820852s, attributes: file_id=af4e8027a422
2025-05-26 13:06:08,602 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8b7a807bf1d.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:08,674 SpawnPoolWorker-39 DEBUG    upload finished in 0.793867s, attributes: file_id=1678194517d2
2025-05-26 13:06:08,674 SpawnPoolWorker-39 DEBUG    upload finished in 0.794314s, attributes: file_id=1678194517d2
upload:  28%|██▊       | 300/1056 [00:33<01:10, 10.67it/s]2025-05-26 13:06:08,675 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d4e0901114a6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: htt

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:08,829 SpawnPoolWorker-40 DEBUG    upload finished in 0.675448s, attributes: file_id=51af7ae581b0
2025-05-26 13:06:08,829 SpawnPoolWorker-40 DEBUG    upload finished in 0.676008s, attributes: file_id=51af7ae581b0
upload:  29%|██▊       | 302/1056 [00:33<01:07, 11.24it/s]2025-05-26 13:06:08,831 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f40b6481a9cd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:08,925 SpawnPoolWorker-41 DEBUG    upload finished in 0.698466s, attributes: file_id=23f9d562b57f
2025-05-26 13:06:08,925 SpawnPoolWorker-41 DEBUG    upload finished in 0.698945s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:09,042 SpawnPoolWorker-36 DEBUG    upload finished in 0.742773s, attributes: file_id=44fffbe1164d
2025-05-26 13:06:09,042 SpawnPoolWorker-36 DEBUG    upload finished in 0.743285s, attributes: file_id=44fffbe1164d
upload:  29%|██▉       | 304/1056 [00:34<01:10, 10.61it/s]2025-05-26 13:06:09,044 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fe9b64c90aea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:09,134 SpawnPoolWorker-34 DEBUG    upload finished in 0.752407s, attributes: file_id=513729de0771
2025-05-26 13:06:09,134 SpawnPoolWorker-34 DEBUG    upload finished in 0.752886s, attributes: file_id=513729de0771
2025-05-26 13:06:09

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:09,245 SpawnPoolWorker-35 DEBUG    upload finished in 0.733419s, attributes: file_id=f63311137bb0
2025-05-26 13:06:09,246 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/adae3e43c755.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:09,421 SpawnPoolWorker-39 DEBUG    upload finished in 0.746288s, attributes: file_id=d4e0901114a6
2025-05-26 13:06:09,421 SpawnPoolWorker-39 DEBUG    upload finished in 0.746727s, attributes: file_id=d4e0901114a6
upload:  29%|██▉       | 308/1056 [00:34<01:11, 10.43it/s]2025-05-26 13:06:09,423 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:09,501 SpawnPoolWorker-38 DEBUG    upload finished in 0.739007s, attributes: file_id=9cd4f3ffcc72
2025-05-26 13:06:09,502 SpawnPoolWorker-38 DEBUG    upload finished in 0.73953s, attributes: file_id=9cd4f3ffcc72
2025-05-26 13:06:09,503 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82cde931c08d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:09,565 SpawnPoolWorker-40 DEBUG    upload finished in 0.733949s, attributes: file_id=f40b6481a9cd
2025-05-26 13:06:09,565 SpawnPoolWorker-40 DEBUG    upload finished in 0.734403s, attributes: file_id=f40b6481a9cd
upload:  29%|██▉       | 310/1056 [00:34<01:06, 11.29it/s]2025-05-26 13:06:09,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:09,720 SpawnPoolWorker-36 DEBUG    upload finished in 0.676362s, attributes: file_id=fe9b64c90aea
2025-05-26 13:06:09,720 SpawnPoolWorker-36 DEBUG    upload finished in 0.676969s, attributes: file_id=fe9b64c90aea
2025-05-26 13:06:09,722 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8def0e3a9634.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:09,794 SpawnPoolWorker-41 DEBUG    upload finished in 0.868382s, attributes: file_id=3c969fd40c4c
2025-05-26 13:06:09,795 SpawnPoolWorker-41 DEBUG    upload finished in 0.868831s, attributes: file_id=3c969fd40c4c
upload:  30%|██▉       | 312/1056 [00:34<01:11, 10.36it/s]2025-05-26 13:06:09,796 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:09,942 SpawnPoolWorker-37 DEBUG    upload finished in 0.739117s, attributes: file_id=fd669a56c585
2025-05-26 13:06:09,942 SpawnPoolWorker-37 DEBUG    upload finished in 0.739731s, attributes: file_id=fd669a56c585
upload:  30%|██▉       | 314/1056 [00:34<01:06, 11.15it/s]2025-05-26 13:06:09,944 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5c58ec986dce.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:10,027 SpawnPoolWorker-35 DEBUG    upload finished in 0.780873s, attributes: file_id=adae3e43c755
2025-05-26 13:06:10,027 SpawnPoolWorker-35 DEBUG    upload finished in 0.78138s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:10,203 SpawnPoolWorker-38 DEBUG    upload finished in 0.700368s, attributes: file_id=82cde931c08d
2025-05-26 13:06:10,203 SpawnPoolWorker-38 DEBUG    upload finished in 0.700779s, attributes: file_id=82cde931c08d
upload:  30%|███       | 318/1056 [00:35<00:56, 12.99it/s]2025-05-26 13:06:10,205 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2b8326f6f740.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:10,459 SpawnPoolWorker-36 DEBUG    upload finished in 0.737203s, attributes: file_id=8def0e3a9634
2025-05-26 13:06:10,459 SpawnPoolWorker-36 DEBUG    upload finished in 0.73782s, attributes: file_id=8def0e3a9634
2025-05-26 13:06:10,461 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/243aac6fb32a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:10,542 SpawnPoolWorker-34 DEBUG    upload finished in 0.68722s, attributes: file_id=a8b5eb17ad1f
2025-05-26 13:06:10,543 SpawnPoolWorker-34 DEBUG    upload finished in 0.687808s, attributes: file_id=a8b5eb17ad1f
upload:  30%|███       | 320/1056 [00:35<01:17,  9.54it/s]2025-05-26 13:06:10,544 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:10,739 SpawnPoolWorker-41 DEBUG    upload finished in 0.942959s, attributes: file_id=e96a7fb4d747
2025-05-26 13:06:10,739 SpawnPoolWorker-41 DEBUG    upload finished in 0.943644s, attributes: file_id=e96a7fb4d747
2025-05-26 13:06:10,743 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6be3dac8956d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:10,855 SpawnPoolWorker-37 DEBUG    upload finished in 0.911613s, attributes: file_id=5c58ec986dce
2025-05-26 13:06:10,856 SpawnPoolWorker-37 DEBUG    upload finished in 0.912179s, attributes: file_id=5c58ec986dce
upload:  31%|███       |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:11,022 SpawnPoolWorker-38 DEBUG    upload finished in 0.817768s, attributes: file_id=2b8326f6f740
2025-05-26 13:06:11,023 SpawnPoolWorker-38 DEBUG    upload finished in 0.818259s, attributes: file_id=2b8326f6f740
upload:  31%|███       | 326/1056 [00:36<01:06, 11.04it/s]2025-05-26 13:06:11,024 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/21480e4d1631.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:11,221 SpawnPoolWorker-34 DEBUG    upload finished in 0.677448s, attributes: file_id=6ec8076f7474
2025-05-26 13:06:11,222 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:11,296 SpawnPoolWorker-36 DEBUG    upload finished in 0.83571s, attributes: file_id=243aac6fb32a
2025-05-26 13:06:11,296 SpawnPoolWorker-36 DEBUG    upload finished in 0.836167s, attributes: file_id=243aac6fb32a
upload:  31%|███       | 328/1056 [00:36<01:16,  9.57it/s]2025-05-26 13:06:11,298 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6b2eb91556a7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:11,513 SpawnPoolWorker-41 DEBUG    upload finished in 0.769941s, attributes: file_id=6be3dac8956d
2025-05-26 13:06:11,513 SpawnPoolWorker-41 DEBUG    upload finished in 0.770472s, attributes: file_id=6be3dac8956d
upload:  31%|███▏      | 330/1056 [00:36<01:16,  9.46it/s]2025-05-26 13:06:11,515 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/568096d4c1af.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:11,626 SpawnPoolWorker-35 DEBUG    upload finished in 1.033001s, attributes: file_id=1bf080e6ede9
2025-05-26 13:06:11,626 SpawnPoolWorker-35 DEBUG    upload finished in 1.033534s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:11,827 SpawnPoolWorker-40 DEBUG    upload finished in 0.894516s, attributes: file_id=a02d98dd885a
2025-05-26 13:06:11,828 SpawnPoolWorker-40 DEBUG    upload finished in 0.895036s, attributes: file_id=a02d98dd885a
2025-05-26 13:06:11,830 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1942b9f9ed4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:11,935 SpawnPoolWorker-38 DEBUG    upload finished in 0.911073s, attributes: file_id=21480e4d1631
2025-05-26 13:06:11,935 SpawnPoolWorker-38 DEBUG    upload finished in 0.911562s, attributes: file_id=21480e4d1631
upload:  32%|███▏      |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:12,164 SpawnPoolWorker-36 DEBUG    upload finished in 0.866561s, attributes: file_id=6b2eb91556a7
2025-05-26 13:06:12,165 SpawnPoolWorker-36 DEBUG    upload finished in 0.867044s, attributes: file_id=6b2eb91556a7
upload:  32%|███▏      | 335/1056 [00:37<01:32,  7.76it/s]2025-05-26 13:06:12,166 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/585d12e985f0.json not detected as batch file data
2025-05-26 13:06:12,221 SpawnPoolWorker-34 DEBUG    upload finished in 0.998027s, attributes: file_id=c7df2df72a34
2025-05-26 13:06:12,221 SpawnPoolWorker-34 DEBUG    upload finished in 0.998524s, attributes: file_id=c7df2df72a34
2025-05-26 13:06:12,223 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/70acf6e55eab.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:12,501 SpawnPoolWorker-41 DEBUG    upload finished in 0.986157s, attributes: file_id=568096d4c1af
2025-05-26 13:06:12,501 SpawnPoolWorker-41 DEBUG    upload finished in 0.986746s, attributes: file_id=568096d4c1af
2025-05-26 13:06:12,503 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/940faae81b1a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:12,695 SpawnPoolWorker-37 DEBUG    upload finished in 1.006628s, attributes: file_id=de887096afec
2025-05-26 13:06:12,695 SpawnPoolWorker-37 DEBUG    upload finished in 1.007212s, attributes: file_id=de887096afec
upload:  32%|███▏      | 339/1056 [00:37<01:38,  7.29it/s]2025-05-26 13:06:12,697 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4cdd1b2ef8fc.json not detected as batch file data
A value is trying to be set on a copy of a slice from 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:12,802 SpawnPoolWorker-40 DEBUG    upload finished in 0.972874s, attributes: file_id=c1942b9f9ed4
2025-05-26 13:06:12,803 SpawnPoolWorker-40 DEBUG    upload finished in 0.97352s, attributes: file_id=c1942b9f9ed4
upload:  32%|███▏      | 340/1056 [00:37<01:34,  7.60it/s]2025-05-26 13:06:12,805 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/22561ed37c5a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:12,876 SpawnPoolWorker-36 DEBUG    upload finished in 0.71047s, attributes: file_id=585d12e985f0
2025-05-26 13:06:12,877 SpawnPoolWorker-36 DEBUG    upload finished in 0.710921s, attributes: file_id=585d12e985f0
2025-05-26 13:06:12,8

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:13,073 SpawnPoolWorker-39 DEBUG    upload finished in 0.7711s, attributes: file_id=0dd024e24a50
2025-05-26 13:06:13,074 SpawnPoolWorker-39 DEBUG    upload finished in 0.771721s, attributes: file_id=0dd024e24a50
2025-05-26 13:06:13,075 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d250237e82d6.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:13,149 SpawnPoolWorker-38 DEBUG    upload finished in 1.21262s, attributes: file_id=5a2dbee32f81
2025-05-26 13:06:13,149 SpawnPoolWorker-38 DEBUG    upload finished in 1.213103s, attributes: file_id=5a2dbee32f81
upload:  33%|███▎      | 34

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:13,336 SpawnPoolWorker-35 DEBUG    upload finished in 1.70857s, attributes: file_id=d9ef1073131e
2025-05-26 13:06:13,336 SpawnPoolWorker-35 DEBUG    upload finished in 1.709007s, attributes: file_id=d9ef1073131e
upload:  33%|███▎      | 346/1056 [00:38<01:13,  9.72it/s]2025-05-26 13:06:13,337 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd01b8f4f649.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:13,698 SpawnPoolWorker-37 DEBUG    upload finished in 1.000812s, attributes: file_id=4cdd1b2ef8fc
2025-05-26 13:06:13,698 SpawnPoolWorker-37 DEBUG    upload finished in 1.001702s, attributes: file_id=4cdd1b2ef8fc
2025-05-26 13:06:13,701 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/207a9d4f4895.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:13,898 SpawnPoolWorker-36 DEBUG    upload finished in 1.020835s, attributes: file_id=54de75a0d4e1
2025-05-26 13:06:13,899 SpawnPoolWorker-36 DEBUG    upload finished in 1.021568s, attributes: file_id=54de75a0d4e1
upload:  33%|███▎      | 348/1056 [00:38<01:52,  6.28it/s]

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:13,902 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dfff349ef510.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:14,139 SpawnPoolWorker-38 DEBUG    upload finished in 0.987878s, attributes: file_id=9871cae03383
2025-05-26 13:06:14,139 SpawnPoolWorker-38 DEBUG    upload finished in 0.988382s, attributes: file_id=9871cae03383
upload:  33%|███▎      | 349/1056 [00:39<02:02,  5.75it/s]2025-05-26 13:06:14,140 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0f116c3dad2a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:14,421 SpawnPoolWorker-39 DEBUG    upload finished in 1.346196s, attributes: file_id=d250237e82d6
2025-05-26 13:06:14,422 SpawnPoolWorker-39 DEBUG    upload finished in 1.34693s, attributes: file_id=d250237e82d6
upload:  33%|███▎      | 351/1056 [00:39<01:54,  6.18it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:06:14,424 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0c12a9467428.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:14,546 SpawnPoolWorker-40 DEBUG    upload finished in 1.74209s, attributes: file_id=22561ed37c5a
2025-05-26 13:06:14,547 SpawnPoolWorker-40 DEBUG    upload finished in 1.742749s, attributes: file_id=22561ed37c5a
upload:  33%|███▎    

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:14,649 SpawnPoolWorker-34 DEBUG    upload finished in 1.69565s, attributes: file_id=12bb6e5fbec1
2025-05-26 13:06:14,650 SpawnPoolWorker-34 DEBUG    upload finished in 1.696542s, attributes: file_id=12bb6e5fbec1
upload:  33%|███▎      | 353/1056 [00:39<01:38,  7.16it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:06:14,652 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ddb772fa8d91.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:14,760 SpawnPoolWorker-35 DEBUG    upload finished in 1.422702s, attributes: file_id=fd01b8f4f649
2025-05-26 13:06:14,760 SpawnPoolWorker-35 DEBUG    upload finished in 1.423174s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:14,881 SpawnPoolWorker-36 DEBUG    upload finished in 0.979519s, attributes: file_id=dfff349ef510
2025-05-26 13:06:14,881 SpawnPoolWorker-36 DEBUG    upload finished in 0.980141s, attributes: file_id=dfff349ef510
upload:  34%|███▎      | 355/1056 [00:39<01:30,  7.77it/s]2025-05-26 13:06:14,882 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c33bb55b246c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:14,969 SpawnPoolWorker-37 DEBUG    upload finished in 1.268616s, attributes: file_id=207a9d4f4895
2025-05-26 13:06:14,969 SpawnPoolWorker-37 DEBUG    upload finished in 1.26936s, attributes: file_id=207a9d4f4895
2025-05-26 13:06:14,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:15,188 SpawnPoolWorker-41 DEBUG    upload finished in 0.912567s, attributes: file_id=efe7215c8d32
2025-05-26 13:06:15,189 SpawnPoolWorker-41 DEBUG    upload finished in 0.913246s, attributes: file_id=efe7215c8d32
upload:  34%|███▍      | 358/1056 [00:40<01:19,  8.77it/s]2025-05-26 13:06:15,191 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0b2ba82be3bb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:15,420 SpawnPoolWorker-40 DEBUG    upload finished in 0.871282s, attributes: file_id=51002af197f9
2025-05-26 13:06:15,420 SpawnPoolWorker-40 DEBUG    upload finished in 0.871811s, attributes: file_id=51002af197f9
2025-05-26 13:06:15,421 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/247928580316.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:15,501 SpawnPoolWorker-35 DEBUG    upload finished in 0.739391s, attributes: file_id=f8f0043106e3
2025-05-26 13:06:15,501 SpawnPoolWorker-35 DEBUG    upload finished in 0.739985s, attributes: file_id=f8f0043106e3
upload:  34%|███▍      | 361/1056 [00:40<01:13,  9.51it/s]2025-05-26 13:06:15

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:15,763 SpawnPoolWorker-36 DEBUG    upload finished in 0.880872s, attributes: file_id=c33bb55b246c
2025-05-26 13:06:15,763 SpawnPoolWorker-36 DEBUG    upload finished in 0.881458s, attributes: file_id=c33bb55b246c
upload:  34%|███▍      | 363/1056 [00:40<01:19,  8.70it/s]2025-05-26 13:06:15,765 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74ac23f25054.json not detected as batch file data
2025-05-26 13:06:15,866 SpawnPoolWorker-38 DEBUG    upload finished in 0.784717s, attributes: file_id=d7c3784ec29e
2025-05-26 13:06:15,866 SpawnPoolWorker-38 DEBUG    upload finished in 0.785291s, attributes: file_id=d7c3784ec29e
upload:  34%|███▍      | 364/1056 [00:40<01:17,  8.90it/s]2025-05-26 13:06:15,868 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dab88f084826.json not detected as batch file d

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:16,020 SpawnPoolWorker-41 DEBUG    upload finished in 0.829438s, attributes: file_id=0b2ba82be3bb
2025-05-26 13:06:16,020 SpawnPoolWorker-41 DEBUG    upload finished in 0.830108s, attributes: file_id=0b2ba82be3bb
upload:  35%|███▍      | 366/1056 [00:41<01:08, 10.10it/s]2025-05-26 13:06:16,022 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dd43df9711d9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:16,085 SpawnPoolWorker-39 DEBUG    upload finished in 0.740462s, attributes: file_id=a4f93480a74d
2025-05-26 13:06:16,086 SpawnPoolWorker-39 DEBUG    upload finished in 0.740931s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:16,229 SpawnPoolWorker-40 DEBUG    upload finished in 0.808245s, attributes: file_id=247928580316
2025-05-26 13:06:16,230 SpawnPoolWorker-40 DEBUG    upload finished in 0.808812s, attributes: file_id=247928580316
upload:  35%|███▍      | 368/1056 [00:41<01:09,  9.90it/s]2025-05-26 13:06:16,231 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b6f431559a13.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:16,258 SpawnPoolWorker-35 DEBUG    upload finished in 0.75617s, attributes: file_id=cfc3199af611
2025-05-26 13:06:16,259 SpawnPoolWorker-35 DEBUG    upload finished in 0.756598s, attributes: file_id=cfc3199af611
2025-05-26 13:06:16,260 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:16,449 SpawnPoolWorker-36 DEBUG    upload finished in 0.684456s, attributes: file_id=74ac23f25054
2025-05-26 13:06:16,449 SpawnPoolWorker-36 DEBUG    upload finished in 0.684916s, attributes: file_id=74ac23f25054
2025-05-26 13:06:16,451 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/da337758756b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:16,630 SpawnPoolWorker-38 DEBUG    upload finished in 0.76277s, attributes: file_id=dab88f084826
upload:  35%|███▌      | 372/1056 [00:41<01:09,  9.90it/s]2025-05-26 13:06:16,631 SpawnPoolWorker-38 DEBUG    upload finished in 0.763349s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:16,750 SpawnPoolWorker-37 DEBUG    upload finished in 0.806273s, attributes: file_id=a1307b7d38d9
2025-05-26 13:06:16,750 SpawnPoolWorker-37 DEBUG    upload finished in 0.806893s, attributes: file_id=a1307b7d38d9
2025-05-26 13:06:16,752 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/40ea5c74f63f.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:16,797 SpawnPoolWorker-39 DEBUG    upload finished in 0.709947s, attributes: file_id=c56b7a8cde5e
2025-05-26 13:06:16,797 SpawnPoolWorker-39 DEBUG    upload finished in 0.710454s, attributes: file_id=c56b7a8cde5e
upload:  35%|███▌      | 374/1056 [00:41<01:05, 10.48it/s]2025-05-26 13:06:16

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:16,979 SpawnPoolWorker-35 DEBUG    upload finished in 0.718901s, attributes: file_id=f7c71393dd4b
2025-05-26 13:06:16,979 SpawnPoolWorker-35 DEBUG    upload finished in 0.719363s, attributes: file_id=f7c71393dd4b
upload:  36%|███▌      | 376/1056 [00:41<01:03, 10.64it/s]2025-05-26 13:06:16,980 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/efe6198e181e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:17,079 SpawnPoolWorker-34 DEBUG    upload finished in 0.656301s, attributes: file_id=f2100ff8d77d
2025-05-26 13:06:17,079 SpawnPoolWorker-34 DEBUG    upload finished in 0.656694s, attributes: file_id=f2100ff8d77d
2025-05-26 13:06:17

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:17,278 SpawnPoolWorker-36 DEBUG    upload finished in 0.828155s, attributes: file_id=da337758756b
2025-05-26 13:06:17,279 SpawnPoolWorker-36 DEBUG    upload finished in 0.828627s, attributes: file_id=da337758756b
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:17,280 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9f058d20565d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:17,348 SpawnPoolWorker-38 DEBUG    upload finished in 0.715857s, attributes: file_id=b0bc5515d9d7
2025-05-26 13:06:17,348 SpawnPoolWorker-38 DEBUG    upload finished in 0.71635s, attributes: file_id=b0bc5515d9d7
upload:  36%|███▌      | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:17,499 SpawnPoolWorker-37 DEBUG    upload finished in 0.747197s, attributes: file_id=40ea5c74f63f
2025-05-26 13:06:17,499 SpawnPoolWorker-37 DEBUG    upload finished in 0.747692s, attributes: file_id=40ea5c74f63f
2025-05-26 13:06:17,501 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2f3a8fa6ea6e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:17,617 SpawnPoolWorker-39 DEBUG    upload finished in 0.819297s, attributes: file_id=5ff4c31d0a0e
2025-05-26 13:06:17,618 SpawnPoolWorker-39 DEBUG    upload finished in 0.819739s, attributes: file_id=5ff4c31d0a0e
upload:  36%|███▌      | 382/1056 [00:42<01:11,  9.42it/s]2025-05-26 13:06:17

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:17,699 SpawnPoolWorker-35 DEBUG    upload finished in 0.718422s, attributes: file_id=efe6198e181e
2025-05-26 13:06:17,699 SpawnPoolWorker-35 DEBUG    upload finished in 0.718868s, attributes: file_id=efe6198e181e
2025-05-26 13:06:17,700 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/31885037b214.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:17,874 SpawnPoolWorker-34 DEBUG    upload finished in 0.793647s, attributes: file_id=ed5f36a44775
2025-05-26 13:06:17,874 SpawnPoolWorker-34 DEBUG    upload finished in 0.794143s, attributes: file_id=ed5f36a44775
upload:  36%|███▋      | 385/1056 [00:42<01:05, 10.21it/s]2025-05-26 13:06:17,876 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f90e3b50768a.json not detected as batch file data
A

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:18,287 SpawnPoolWorker-37 DEBUG    upload finished in 0.786945s, attributes: file_id=2f3a8fa6ea6e
2025-05-26 13:06:18,288 SpawnPoolWorker-37 DEBUG    upload finished in 0.787464s, attributes: file_id=2f3a8fa6ea6e
upload:  37%|███▋      | 389/1056 [00:43<01:08,  9.73it/s]2025-05-26 13:06:18,289 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b3b5335ec3a0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:18,345 SpawnPoolWorker-35 DEBUG    upload finished in 0.645524s, attributes: file_id=31885037b214
2025-05-26 13:06:18,346 SpawnPoolWorker-35 DEBUG    upload finished in 0.645994s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:18,549 SpawnPoolWorker-40 DEBUG    upload finished in 0.908656s, attributes: file_id=a8aa3f9571c9
2025-05-26 13:06:18,549 SpawnPoolWorker-40 DEBUG    upload finished in 0.909128s, attributes: file_id=a8aa3f9571c9
2025-05-26 13:06:18,550 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dc566d48be6b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:18,646 SpawnPoolWorker-34 DEBUG    upload finished in 0.77036s, attributes: file_id=f90e3b50768a
2025-05-26 13:06:18,646 SpawnPoolWorker-34 DEBUG    upload finished in 0.770881s, attributes: file_id=f90e3b50768a
upload:  37%|███▋      | 393/1056 [00:43<01:04, 10.21it/s]2025-05-26 13:06:18,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:18,824 SpawnPoolWorker-38 DEBUG    upload finished in 0.765698s, attributes: file_id=af4acfc9b601
2025-05-26 13:06:18,824 SpawnPoolWorker-38 DEBUG    upload finished in 0.766181s, attributes: file_id=af4acfc9b601
upload:  37%|███▋      | 395/1056 [00:43<01:03, 10.48it/s]2025-05-26 13:06:18,826 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/78c4f3f5a842.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:18,921 SpawnPoolWorker-41 DEBUG    upload finished in 0.985812s, attributes: file_id=77f777269c35
2025-05-26 13:06:18,921 SpawnPoolWorker-41 DEBUG    upload finished in 0.986326s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,063 SpawnPoolWorker-35 DEBUG    upload finished in 0.716219s, attributes: file_id=9af92fa22dd2
2025-05-26 13:06:19,063 SpawnPoolWorker-35 DEBUG    upload finished in 0.716715s, attributes: file_id=9af92fa22dd2
upload:  38%|███▊      | 397/1056 [00:44<01:07,  9.76it/s]2025-05-26 13:06:19,065 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3f39f96be5d8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:19,149 SpawnPoolWorker-37 DEBUG    upload finished in 0.860314s, attributes: file_id=b3b5335ec3a0
2025-05-26 13:06:19,149 SpawnPoolWorker-37 DEBUG    upload finished in 0.860809s, attributes: file_id=b3b5335ec3a0
2025-05-26 13:06:19

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,369 SpawnPoolWorker-40 DEBUG    upload finished in 0.81926s, attributes: file_id=dc566d48be6b
2025-05-26 13:06:19,370 SpawnPoolWorker-40 DEBUG    upload finished in 0.819781s, attributes: file_id=dc566d48be6b
2025-05-26 13:06:19,371 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0140d6f61a26.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,448 SpawnPoolWorker-34 DEBUG    upload finished in 0.800077s, attributes: file_id=33b86cde3337
2025-05-26 13:06:19,448 SpawnPoolWorker-34 DEBUG    upload finished in 0.800734s, attributes: file_id=33b86cde3337
upload:  38%|███▊      | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,647 SpawnPoolWorker-38 DEBUG    upload finished in 0.821302s, attributes: file_id=78c4f3f5a842
2025-05-26 13:06:19,647 SpawnPoolWorker-38 DEBUG    upload finished in 0.821863s, attributes: file_id=78c4f3f5a842
upload:  38%|███▊      | 403/1056 [00:44<01:04, 10.07it/s]2025-05-26 13:06:19,650 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/67770b6ff6ed.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,723 SpawnPoolWorker-41 DEBUG    upload finished in 0.800388s, attributes: file_id=3f22ccb719d1
2025-05-26 13:06:19,723 SpawnPoolWorker-41 DEBUG    upload finished in 0.800844s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:19,930 SpawnPoolWorker-39 DEBUG    upload finished in 0.66899s, attributes: file_id=62762d4ef6ab
2025-05-26 13:06:19,931 SpawnPoolWorker-39 DEBUG    upload finished in 0.669472s, attributes: file_id=62762d4ef6ab
2025-05-26 13:06:19,932 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16a91ecbd7ee.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:20,024 SpawnPoolWorker-37 DEBUG    upload finished in 0.873669s, attributes: file_id=896955dd5898
2025-05-26 13:06:20,025 SpawnPoolWorker-37 DEBUG    upload finished in 0.874176s, attributes: file_id=896955dd5898
upload:  39%|███▊      | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:20,242 SpawnPoolWorker-36 DEBUG    upload finished in 0.707304s, attributes: file_id=e14135d42ece
2025-05-26 13:06:20,242 SpawnPoolWorker-36 DEBUG    upload finished in 0.707858s, attributes: file_id=e14135d42ece
upload:  39%|███▊      | 409/1056 [00:45<01:05,  9.93it/s]2025-05-26 13:06:20,244 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e5657d5719ee.json not detected as batch file data
2025-05-26 13:06:20,317 SpawnPoolWorker-34 DEBUG    upload finished in 0.867845s, attributes: file_id=d250f1a274c1
2025-05-26 13:06:20,317 SpawnPoolWorker-34 DEBUG    upload finished in 0.868264s, attributes: file_id=d250f1a274c1
2025-05-26 13:06:20,318 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/29905403403f.json not detected as batch file data
A value is trying to be set on a copy of a slice from 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:20,524 SpawnPoolWorker-41 DEBUG    upload finished in 0.799858s, attributes: file_id=535e330e0828
2025-05-26 13:06:20,524 SpawnPoolWorker-41 DEBUG    upload finished in 0.800329s, attributes: file_id=535e330e0828
2025-05-26 13:06:20,525 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/64b11cafa23a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:20,622 SpawnPoolWorker-38 DEBUG    upload finished in 0.972509s, attributes: file_id=67770b6ff6ed
2025-05-26 13:06:20,623 SpawnPoolWorker-38 DEBUG    upload finished in 0.973305s, attributes: file_id=67770b6ff6ed
upload:  39%|███▉      |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:20,732 SpawnPoolWorker-39 DEBUG    upload finished in 0.79957s, attributes: file_id=16a91ecbd7ee
2025-05-26 13:06:20,732 SpawnPoolWorker-39 DEBUG    upload finished in 0.800018s, attributes: file_id=16a91ecbd7ee
2025-05-26 13:06:20,733 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/20be6af929d7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:20,827 SpawnPoolWorker-37 DEBUG    upload finished in 0.800768s, attributes: file_id=c28ec5ee1049
upload:  39%|███▉      | 415/1056 [00:45<01:03, 10.06it/s]2025-05-26 13:06:20,827 SpawnPoolWorker-37 DEBUG    upload finished in 0.80126s, attributes: file_id=c28ec5ee1049
2025-05-26 13:06:20,8

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:21,027 SpawnPoolWorker-34 DEBUG    upload finished in 0.709102s, attributes: file_id=29905403403f
2025-05-26 13:06:21,027 SpawnPoolWorker-34 DEBUG    upload finished in 0.709548s, attributes: file_id=29905403403f
upload:  40%|███▉      | 418/1056 [00:46<00:55, 11.54it/s]2025-05-26 13:06:21,029 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/11ded53052fe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:21,126 SpawnPoolWorker-35 DEBUG    upload finished in 0.701767s, attributes: file_id=3a0af0f23200
2025-05-26 13:06:21,127 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:21,270 SpawnPoolWorker-41 DEBUG    upload finished in 0.744926s, attributes: file_id=64b11cafa23a
2025-05-26 13:06:21,270 SpawnPoolWorker-41 DEBUG    upload finished in 0.745441s, attributes: file_id=64b11cafa23a
upload:  40%|███▉      | 420/1056 [00:46<01:01, 10.41it/s]2025-05-26 13:06:21,272 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/561b12ac3167.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:21,356 SpawnPoolWorker-38 DEBUG    upload finished in 0.732563s, attributes: file_id=c5e61c3c1731
2025-05-26 13:06:21,357 SpawnPoolWorker-38 DEBUG    upload finished in 0.733128s, attributes: file_id=c5e61c3c1731
2025-05-26 13:06:21

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:21,534 SpawnPoolWorker-37 DEBUG    upload finished in 0.705174s, attributes: file_id=82e3929a782e
2025-05-26 13:06:21,534 SpawnPoolWorker-37 DEBUG    upload finished in 0.705694s, attributes: file_id=82e3929a782e
2025-05-26 13:06:21,535 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53e104d85fb7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:21,568 SpawnPoolWorker-40 DEBUG    upload finished in 0.645291s, attributes: file_id=bdf7be16cb79
2025-05-26 13:06:21,569 SpawnPoolWorker-40 DEBUG    upload finished in 0.645764s, attributes: file_id=bdf7be16cb79
upload:  40%|████      | 424/1056 [00:46<00:53, 11.75it/s]2025-05-26 13:06:21

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:21,808 SpawnPoolWorker-36 DEBUG    upload finished in 0.966952s, attributes: file_id=b68e3371d6cf
2025-05-26 13:06:21,808 SpawnPoolWorker-36 DEBUG    upload finished in 0.967491s, attributes: file_id=b68e3371d6cf
2025-05-26 13:06:21,810 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f207da965c5e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:21,888 SpawnPoolWorker-34 DEBUG    upload finished in 0.859431s, attributes: file_id=11ded53052fe
2025-05-26 13:06:21,888 SpawnPoolWorker-34 DEBUG    upload finished in 0.859863s, attributes: file_id=11ded53052fe
upload:  40%|████      | 426/1056 [00:46<01:07,  9.37it/s]2025-05-26 13:06:21,890 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:22,161 SpawnPoolWorker-41 DEBUG    upload finished in 0.889327s, attributes: file_id=561b12ac3167
2025-05-26 13:06:22,161 SpawnPoolWorker-41 DEBUG    upload finished in 0.889855s, attributes: file_id=561b12ac3167
upload:  41%|████      | 428/1056 [00:47<01:12,  8.66it/s]2025-05-26 13:06:22,163 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/25aa9170c232.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:22,235 SpawnPoolWorker-39 DEBUG    upload finished in 0.801755s, attributes: file_id=587351a0e2fa
2025-05-26 13:06:22,235 SpawnPoolWorker-39 DEBUG    upload finished in 0.802261s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:22,546 SpawnPoolWorker-38 DEBUG    upload finished in 1.188348s, attributes: file_id=5a7671087de8
2025-05-26 13:06:22,546 SpawnPoolWorker-38 DEBUG    upload finished in 1.188849s, attributes: file_id=5a7671087de8
upload:  41%|████      | 432/1056 [00:47<01:07,  9.29it/s]2025-05-26 13:06:22,548 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/646e4e17142a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:22,676 SpawnPoolWorker-36 DEBUG    upload finished in 0.866327s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:22,794 SpawnPoolWorker-34 DEBUG    upload finished in 0.905305s, attributes: file_id=3e8edc74ae29
2025-05-26 13:06:22,795 SpawnPoolWorker-34 DEBUG    upload finished in 0.905788s, attributes: file_id=3e8edc74ae29
upload:  41%|████      | 434/1056 [00:47<01:10,  8.86it/s]2025-05-26 13:06:22,796 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f0d651d1cf0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:22,837 SpawnPoolWorker-35 DEBUG    upload finished in 0.868184s, attributes: file_id=4556aa37dc0f
2025-05-26 13:06:22,837 SpawnPoolWorker-35 DEBUG    upload finished in 0.868602s, attributes: file_id=4556aa37dc0f
2025-05-26 13:06:22

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:23,018 SpawnPoolWorker-39 DEBUG    upload finished in 0.782045s, attributes: file_id=8665ecc712b6
2025-05-26 13:06:23,019 SpawnPoolWorker-39 DEBUG    upload finished in 0.782868s, attributes: file_id=8665ecc712b6
upload:  41%|████▏     | 436/1056 [00:48<01:09,  8.88it/s]2025-05-26 13:06:23,022 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f97476feafbb.json not detected as batch file data
2025-05-26 13:06:23,080 SpawnPoolWorker-37 DEBUG    upload finished in 0.724119s, attributes: file_id=3dc7bb186dea
2025-05-26 13:06:23,081 SpawnPoolWorker-37 DEBUG    upload finished in 0.724638s, attributes: file_id=3dc7bb186dea
2025-05-26 13:06:23,082 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ad7e0152a61c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:23,271 SpawnPoolWorker-40 DEBUG    upload finished in 0.956684s, attributes: file_id=6983dd59d635
2025-05-26 13:06:23,272 SpawnPoolWorker-40 DEBUG    upload finished in 0.957157s, attributes: file_id=6983dd59d635
2025-05-26 13:06:23,273 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/819a4e0cf13e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:23,332 SpawnPoolWorker-38 DEBUG    upload finished in 0.784468s, attributes: file_id=646e4e17142a
2025-05-26 13:06:23,332 SpawnPoolWorker-38 DEBUG    upload finished in 0.784987s, attributes: file_id=646e4e17142a
upload:  42%|████▏     | 440/1056 [00:48<00:58, 10.58it/s]2025-05-26 13:06:23

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:23,476 SpawnPoolWorker-36 DEBUG    upload finished in 0.797872s, attributes: file_id=bbafe4f4ae02
2025-05-26 13:06:23,476 SpawnPoolWorker-36 DEBUG    upload finished in 0.798498s, attributes: file_id=bbafe4f4ae02
2025-05-26 13:06:23,478 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/be2e96cf060b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:23,613 SpawnPoolWorker-34 DEBUG    upload finished in 0.816974s, attributes: file_id=6f0d651d1cf0
2025-05-26 13:06:23,613 SpawnPoolWorker-34 DEBUG    upload finished in 0.817449s, attributes: file_id=6f0d651d1cf0
upload:  42%|████▏     | 442/1056 [00:48<01:07,  9.16it/s]2025-05-26 13:06:23

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:23,766 SpawnPoolWorker-41 DEBUG    upload finished in 0.610214s, attributes: file_id=d192b57dc17f
2025-05-26 13:06:23,766 SpawnPoolWorker-41 DEBUG    upload finished in 0.61058s, attributes: file_id=d192b57dc17f
upload:  42%|████▏     | 444/1056 [00:48<01:00, 10.11it/s]2025-05-26 13:06:23,768 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f75e4c9270a7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:23,852 SpawnPoolWorker-37 DEBUG    upload finished in 0.770133s, attributes: file_id=ad7e0152a61c
2025-05-26 13:06:23,853 SpawnPoolWorker-37 DEBUG    upload finished in 0.770624s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:24,005 SpawnPoolWorker-40 DEBUG    upload finished in 0.73172s, attributes: file_id=819a4e0cf13e
2025-05-26 13:06:24,005 SpawnPoolWorker-40 DEBUG    upload finished in 0.732149s, attributes: file_id=819a4e0cf13e
2025-05-26 13:06:24,006 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1954ce55ee1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:24,126 SpawnPoolWorker-38 DEBUG    upload finished in 0.79197s, attributes: file_id=30a6a0bb97b1
2025-05-26 13:06:24,126 SpawnPoolWorker-38 DEBUG    upload finished in 0.792423s, attributes: file_id=30a6a0bb97b1
upload:  42%|████▏     | 4

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:24,190 SpawnPoolWorker-36 DEBUG    upload finished in 0.71285s, attributes: file_id=be2e96cf060b
2025-05-26 13:06:24,190 SpawnPoolWorker-36 DEBUG    upload finished in 0.713311s, attributes: file_id=be2e96cf060b
2025-05-26 13:06:24,192 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/50958add8224.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:24,233 SpawnPoolWorker-39 DEBUG    upload finished in 0.592675s, attributes: file_id=f50cd1806d9d
2025-05-26 13:06:24,233 SpawnPoolWorker-39 DEBUG    upload finished in 0.593097s, attributes: file_id=f50cd1806d9d
upload:  43%|████▎     | 450/1056 [00:49<00:49, 12.13it/s]2025-05-26 13:06:24,235 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:24,592 SpawnPoolWorker-37 DEBUG    upload finished in 0.738164s, attributes: file_id=d27bb6f11092
2025-05-26 13:06:24,592 SpawnPoolWorker-37 DEBUG    upload finished in 0.738738s, attributes: file_id=d27bb6f11092
upload:  43%|████▎     | 452/1056 [00:49<01:07,  8.94it/s]2025-05-26 13:06:24,594 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/02b3cd1e6c45.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:24,698 SpawnPoolWorker-35 DEBUG    upload finished in 0.768591s, attributes: file_id=bd7e97e03ca6
2025-05-26 13:06:24,698 SpawnPoolWorker-35 DEBUG    upload finished in 0.769061s, attributes: file_id=bd7e97e03ca6
2025-05-26 13:06:24

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:24,899 SpawnPoolWorker-41 DEBUG    upload finished in 1.132343s, attributes: file_id=f75e4c9270a7
2025-05-26 13:06:24,900 SpawnPoolWorker-41 DEBUG    upload finished in 1.132799s, attributes: file_id=f75e4c9270a7
upload:  43%|████▎     | 454/1056 [00:49<01:14,  8.03it/s]2025-05-26 13:06:24,901 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1e84f9591f2f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:24,994 SpawnPoolWorker-39 DEBUG    upload finished in 0.760068s, attributes: file_id=06b34690a708
2025-05-26 13:06:24,995 SpawnPoolWorker-39 DEBUG    upload finished in 0.760586s, attributes: file_id=06b34690a708
2025-05-26 13:06:24

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:25,193 SpawnPoolWorker-40 DEBUG    upload finished in 1.187252s, attributes: file_id=c1954ce55ee1
2025-05-26 13:06:25,194 SpawnPoolWorker-40 DEBUG    upload finished in 1.187723s, attributes: file_id=c1954ce55ee1
upload:  43%|████▎     | 457/1056 [00:50<01:07,  8.82it/s]2025-05-26 13:06:25,196 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0e65fd8286be.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:25,406 SpawnPoolWorker-34 DEBUG    upload finished in 1.038497s, attributes: file_id=53b37310692d
2025-05-26 13:06:25,406 SpawnPoolWorker-34 DEBUG    upload finished in 1.039044s, attributes: file_id=53b37310692d
upload:  43%|████▎     | 458/1056 [00:50<01:19,  7.50it/s]2025-05-26 13:06:25,409 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce9d3ac8c9c9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:25,567 SpawnPoolWorker-35 DEBUG    upload finished in 0.867802s, attributes: file_id=b3606295e01b
2025-05-26 13:06:25,568 SpawnPoolWorker-35 DEBUG    upload finished in 0.868295s, attributes: file_id=b3606295e01b
upload:  43%|████▎ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:25,646 SpawnPoolWorker-37 DEBUG    upload finished in 1.052538s, attributes: file_id=02b3cd1e6c45
2025-05-26 13:06:25,646 SpawnPoolWorker-37 DEBUG    upload finished in 1.053019s, attributes: file_id=02b3cd1e6c45
2025-05-26 13:06:25,647 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/435c26f607ec.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:25,734 SpawnPoolWorker-36 DEBUG    upload finished in 1.542489s, attributes: file_id=50958add8224
2025-05-26 13:06:25,734 SpawnPoolWorker-36 DEBUG    upload finished in 1.542878s, attributes: file_id=50958add8224
upload:  44%|████▎     | 461/1056 [00:50<01:10,  8.50it/s]2025-05-26 13:06:25

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:25,851 SpawnPoolWorker-39 DEBUG    upload finished in 0.854604s, attributes: file_id=678b09a5c55b
2025-05-26 13:06:25,851 SpawnPoolWorker-39 DEBUG    upload finished in 0.855069s, attributes: file_id=678b09a5c55b
upload:  44%|████▍     | 462/1056 [00:50<01:09,  8.51it/s]2025-05-26 13:06:25,852 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c167e36596f6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:25,921 SpawnPoolWorker-40 DEBUG    upload finished in 0.725663s, attributes: file_id=0e65fd8286be
2025-05-26 13:06:25,921 SpawnPoolWorker-40 DEBUG    upload finished in 0.726218s, attributes: file_id=0e65fd8286be
2025-05-26 13:06:25

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:26,140 SpawnPoolWorker-34 DEBUG    upload finished in 0.732084s, attributes: file_id=ce9d3ac8c9c9
2025-05-26 13:06:26,140 SpawnPoolWorker-34 DEBUG    upload finished in 0.732619s, attributes: file_id=ce9d3ac8c9c9
upload:  44%|████▍     | 465/1056 [00:51<01:03,  9.26it/s]2025-05-26 13:06:26,141 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/247cbd93b06b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:26,219 SpawnPoolWorker-38 DEBUG    upload finished in 1.125343s, attributes: file_id=2e3ddd48e711
2025-05-26 13:06:26,219 SpawnPoolWorker-38 DEBUG    upload finished in 1.125852s, attributes: file_id=2e3ddd48e711
2025-05-26 13:06:26

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:26,449 SpawnPoolWorker-35 DEBUG    upload finished in 0.88038s, attributes: file_id=9fa8ddf4dd2c
2025-05-26 13:06:26,449 SpawnPoolWorker-35 DEBUG    upload finished in 0.880914s, attributes: file_id=9fa8ddf4dd2c
upload:  44%|████▍     | 467/1056 [00:51<01:14,  7.95it/s]2025-05-26 13:06:26,451 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d80bc2151d77.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:26,739 SpawnPoolWorker-36 DEBUG    upload finished in 1.003741s, attributes: file_id=988a4fa1d07d
2025-05-26 13:06:26,739 SpawnPoolWorker-36 DEBUG    upload finished in 1.004173s, attributes: file_id=988a4fa1d07d
upload:  44%|████▍     | 468/1056 [00:51<01:34,  6.22it/s]2025-05-26 13:06:26,741 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/966e8054c9f8.json not detected as batch file data
2025-05-26 13:06:26,830 SpawnPoolWorker-40 DEBUG    upload finished in 0.907956s, attributes: file_id=7bf67b71c872
2025-05-26 13:06:26,831 SpawnPoolWorker-40 DEBUG    upload finished in 0.908482s, attributes: file_id=7bf67b71c872
2025-05-26 13:06:26,833 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4e4cd4944d7a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:26,936 SpawnPoolWorker-41 DEBUG    upload finished in 0.914098s, attributes: file_id=89878

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:26,943 SpawnPoolWorker-38 DEBUG    upload finished in 0.722169s, attributes: file_id=a2f455c9b97c
2025-05-26 13:06:26,943 SpawnPoolWorker-38 DEBUG    upload finished in 0.722772s, attributes: file_id=a2f455c9b97c
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:26,945 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7efb9284a55f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:27,033 SpawnPoolWorker-39 DEBUG    upload

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:27,147 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/88b4ab4b6781.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:27,235 SpawnPoolWorker-34 DEBUG    upload finished in 1.093897s, attributes: file_id=247cbd93b06b
2025-05-26 13:06:27,235 SpawnPoolWorker-34 DEBUG    upload finished in 1.094469s, attributes: file_id=247cbd93b06b
2025-05-26 13:06:27,237 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e5adf563723a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/sta

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:27,367 SpawnPoolWorker-35 DEBUG    upload finished in 0.916045s, attributes: file_id=d80bc2151d77
2025-05-26 13:06:27,367 SpawnPoolWorker-35 DEBUG    upload finished in 0.91666s, attributes: file_id=d80bc2151d77
upload:  45%|████▍     | 475/1056 [00:52<01:02,  9.25it/s]2025-05-26 13:06:27,370 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c6a716652907.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:27,409 SpawnPoolWorker-36 DEBUG    upload finished in 0.668163s, attributes: file_id=966e8054c9f8
2025-05-26 13:06:27,409 SpawnPoolWorker-36 DEBUG    upload finished in 0.668667s, attributes: file_id=966e8054c9f8
2025-05-26 13:06:27,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:27,640 SpawnPoolWorker-38 DEBUG    upload finished in 0.695671s, attributes: file_id=7efb9284a55f
2025-05-26 13:06:27,640 SpawnPoolWorker-38 DEBUG    upload finished in 0.696206s, attributes: file_id=7efb9284a55f
upload:  45%|████▌     | 478/1056 [00:52<00:58,  9.86it/s]2025-05-26 13:06:27,642 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/712803ca2d70.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:27,759 SpawnPoolWorker-39 DEBUG    upload finished in 0.724558s, attributes: file_id=04091b97a6bd
2025-05-26 13:06:27,759 SpawnPoolWorker-39 DEBUG    upload finished in 0.725011s, attributes: file_id=04091b97a6bd
2025-05-26 13:06:27,762 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:27,857 SpawnPoolWorker-41 DEBUG    upload finished in 0.919564s, attributes: file_id=6613713a74fb
2025-05-26 13:06:27,858 SpawnPoolWorker-41 DEBUG    upload finished in 0.920067s, attributes: file_id=6613713a74fb
upload:  45%|████▌     | 480/1056 [00:52<00:59,  9.67it/s]2025-05-26 13:06:27,859 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/68a2183fdca2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:27,958 SpawnPoolWorker-34 DEBUG    upload finished in 0.721729s, attributes: file_id=e5adf563723a
2025-05-26 13:06:27,958 SpawnPoolWorker-34 DEBUG    upload finished in 0.722166s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:28,074 SpawnPoolWorker-37 DEBUG    upload finished in 0.927388s, attributes: file_id=88b4ab4b6781
2025-05-26 13:06:28,075 SpawnPoolWorker-37 DEBUG    upload finished in 0.92803s, attributes: file_id=88b4ab4b6781
upload:  46%|████▌     | 482/1056 [00:53<01:00,  9.48it/s]2025-05-26 13:06:28,076 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8e1aa07cc5ea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:28,158 SpawnPoolWorker-40 DEBUG    upload finished in 0.701312s, attributes: file_id=e60e9bca8bf8
2025-05-26 13:06:28,158 SpawnPoolWorker-40 DEBUG    upload finished in 0.701727s, attributes: file_id=e60e9bca8bf8
2025-05-26 13:06:28,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:28,318 SpawnPoolWorker-36 DEBUG    upload finished in 0.908274s, attributes: file_id=82e45cba2052
2025-05-26 13:06:28,319 SpawnPoolWorker-36 DEBUG    upload finished in 0.908845s, attributes: file_id=82e45cba2052
upload:  46%|████▌     | 484/1056 [00:53<01:03,  8.99it/s]2025-05-26 13:06:28,320 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28176407b9a5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:28,398 SpawnPoolWorker-35 DEBUG    upload finished in 1.029211s, attributes: file_id=c6a716652907
2025-05-26 13:06:28,398 SpawnPoolWorker-35 DEBUG    upload finished in 1.029794s, attributes: file_id=c6a716652907
2025-05-26 13:06:28,401 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:28,540 SpawnPoolWorker-38 DEBUG    upload finished in 0.898862s, attributes: file_id=712803ca2d70
2025-05-26 13:06:28,541 SpawnPoolWorker-38 DEBUG    upload finished in 0.899446s, attributes: file_id=712803ca2d70
2025-05-26 13:06:28,542 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/993bf7caaae2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:28,636 SpawnPoolWorker-41 DEBUG    upload finished in 0.777246s, attributes: file_id=68a2183fdca2
2025-05-26 13:06:28,636 SpawnPoolWorker-41 DEBUG    upload finished in 0.777794s, attributes: file_id=68a2183fdca2
upload:  46%|████▌     | 488/1056 [00:53<00:53, 10.56it/s]2025-05-26 13:06:28

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:28,771 SpawnPoolWorker-34 DEBUG    upload finished in 0.811875s, attributes: file_id=07c09ce2f45e
2025-05-26 13:06:28,772 SpawnPoolWorker-34 DEBUG    upload finished in 0.812347s, attributes: file_id=07c09ce2f45e
2025-05-26 13:06:28,774 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f78bdb9e0a94.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:28,840 SpawnPoolWorker-40 DEBUG    upload finished in 0.680529s, attributes: file_id=16b4d9a43f2d
2025-05-26 13:06:28,840 SpawnPoolWorker-40 DEBUG    upload finished in 0.681011s, attributes: file_id=16b4d9a43f2d
upload:  46%|████▋     | 490/1056 [00:53<00:54, 10.30it/s]2025-05-26 13:06:28

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:28,990 SpawnPoolWorker-37 DEBUG    upload finished in 0.914449s, attributes: file_id=8e1aa07cc5ea
2025-05-26 13:06:28,991 SpawnPoolWorker-37 DEBUG    upload finished in 0.914918s, attributes: file_id=8e1aa07cc5ea
2025-05-26 13:06:28,993 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d6ced8e00da7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:29,033 SpawnPoolWorker-36 DEBUG    upload finished in 0.7136s, attributes: file_id=28176407b9a5
2025-05-26 13:06:29,033 SpawnPoolWorker-36 DEBUG    upload finished in 0.714039s, attributes: file_id=28176407b9a5
upload:  47%|████▋     | 492/1056 [00:54<00:54, 10.32it/s]2025-05-26 13:06:29,0

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:29,241 SpawnPoolWorker-38 DEBUG    upload finished in 0.698919s, attributes: file_id=993bf7caaae2
2025-05-26 13:06:29,241 SpawnPoolWorker-38 DEBUG    upload finished in 0.699439s, attributes: file_id=993bf7caaae2
upload:  47%|████▋     | 494/1056 [00:54<00:55, 10.10it/s]2025-05-26 13:06:29,243 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ebea2885521a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:29,353 SpawnPoolWorker-39 DEBUG    upload finished in 0.893376s, attributes: file_id=7bc78e9b2de8
2025-05-26 13:06:29,353 SpawnPoolWorker-39 DEBUG    upload finished in 0.893898s, attributes: file_id=7bc78e9b2de8
2025-05-26 13:06:29

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:29,457 SpawnPoolWorker-41 DEBUG    upload finished in 0.819109s, attributes: file_id=a6b6253aa91b
2025-05-26 13:06:29,457 SpawnPoolWorker-41 DEBUG    upload finished in 0.819666s, attributes: file_id=a6b6253aa91b
upload:  47%|████▋     | 496/1056 [00:54<00:56,  9.83it/s]2025-05-26 13:06:29,459 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/15ec535ced3b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:29,597 SpawnPoolWorker-34 DEBUG    upload finished in 0.823865s, attributes: file_id=f78bdb9e0a94
2025-05-26 13:06:29,597 SpawnPoolWorker-34 DEBUG    upload finished in 0.824386s, attributes: file_id=f78bdb9e0a94
upload:  47%|████▋ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:29,663 SpawnPoolWorker-40 DEBUG    upload finished in 0.822029s, attributes: file_id=64f704dbf808
2025-05-26 13:06:29,664 SpawnPoolWorker-40 DEBUG    upload finished in 0.822489s, attributes: file_id=64f704dbf808
2025-05-26 13:06:29,665 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bdf1dc1e7fa7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:29,789 SpawnPoolWorker-36 DEBUG    upload finished in 0.754112s, attributes: file_id=d180aeb6f489
2025-05-26 13:06:29,789 SpawnPoolWorker-36 DEBUG    upload finished in 0.754585s, attributes: file_id=d180aeb6f489
upload:  47%|████▋     |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:29,909 SpawnPoolWorker-37 DEBUG    upload finished in 0.916773s, attributes: file_id=d6ced8e00da7
2025-05-26 13:06:29,909 SpawnPoolWorker-37 DEBUG    upload finished in 0.917248s, attributes: file_id=d6ced8e00da7
upload:  47%|████▋     | 500/1056 [00:54<00:59,  9.32it/s]2025-05-26 13:06:29,911 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bda38eaa1fac.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:29,974 SpawnPoolWorker-38 DEBUG    upload finished in 0.73206s, attributes: file_id=ebea2885521a
2025-05-26 13:06:29,975 SpawnPoolWorker-38 DEBUG    upload finished in 0.732526s, attributes: file_id=ebea2885521a
2025-05-26 13:06:29,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:30,217 SpawnPoolWorker-34 DEBUG    upload finished in 0.618395s, attributes: file_id=546f6d626bdd
2025-05-26 13:06:30,217 SpawnPoolWorker-34 DEBUG    upload finished in 0.618952s, attributes: file_id=546f6d626bdd
upload:  48%|████▊     | 503/1056 [00:55<00:58,  9.45it/s]2025-05-26 13:06:30,219 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c05f6e12264c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:30,322 SpawnPoolWorker-41 DEBUG    upload finished in 0.863876s, attributes: file_id=15ec535ced3b
2025-05-26 13:06:30,323 SpawnPoolWorker-41 DEBUG    upload finished in 0.864359s, attributes: file_id=15ec535ced3b
upload:  48%|████▊ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:30,519 SpawnPoolWorker-39 DEBUG    upload finished in 1.164933s, attributes: file_id=903ae06fdb23
2025-05-26 13:06:30,520 SpawnPoolWorker-39 DEBUG    upload finished in 1.165352s, attributes: file_id=903ae06fdb23
upload:  48%|████▊     | 506/1056 [00:55<00:56,  9.73it/s]2025-05-26 13:06:30,521 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1541bfdcfc0b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:30,623 SpawnPoolWorker-37 DEBUG    upload finished in 0.713234s, attributes: file_id=bda38eaa1fac
2025-05-26 13:06:30,624 SpawnPoolWorker-37 DEBUG    upload finished in 0.713843s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:30,729 SpawnPoolWorker-38 DEBUG    upload finished in 0.75306s, attributes: file_id=802f4c9b2c89
2025-05-26 13:06:30,729 SpawnPoolWorker-38 DEBUG    upload finished in 0.753624s, attributes: file_id=802f4c9b2c89
upload:  48%|████▊     | 508/1056 [00:55<00:56,  9.64it/s]2025-05-26 13:06:30,732 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fdd0c10e251d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:30,842 SpawnPoolWorker-36 DEBUG    upload finished in 1.051733s, attributes: file_id=86d586fa0497
2025-05-26 13:06:30,843 SpawnPoolWorker-36 DEBUG    upload finished in 1.052446s, attributes: file_id=86d586fa0497
upload:  48%|████▊  

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:30,935 SpawnPoolWorker-35 DEBUG    upload finished in 0.830233s, attributes: file_id=7928604bacca
2025-05-26 13:06:30,936 SpawnPoolWorker-35 DEBUG    upload finished in 0.830684s, attributes: file_id=7928604bacca
2025-05-26 13:06:30,937 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bde8c7c94187.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:31,011 SpawnPoolWorker-41 DEBUG    upload finished in 0.68722s, attributes: file_id=b78d23ff9a15
2025-05-26 13:06:31,011 SpawnPoolWorker-41 DEBUG    upload finished in 0.687641s, attributes: file_id=b78d23ff9a15
upload:  48%|████▊     | 511/1056 [00:55<00:52, 10.31it/s]2025-05-26 13:06:31,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:31,138 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f3b582d3590.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:31,232 SpawnPoolWorker-40 DEBUG    upload finished in 0.867606s, attributes: file_id=47353925582c
2025-05-26 13:06:31,233 SpawnPoolWorker-40 DEBUG    upload finished in 0.86822s, attributes: file_id=47353925582c
upload:  49%|████▊     | 513/1056 [00:56<00:55,  9.79it/s]2025-05-26 13:06:31,235 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e879eb958ba7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:31,352 SpawnPoolWorker-39 DEBUG    upload finished in 0.831046s, attributes: file_id=1541bfdcfc0b
2025-05-26 13:06:31,352 SpawnPoolWorker-39 DEBUG    upload finished in 0.83155s, attributes: file_id=1541bfdcfc0b
upload:  49%|████▊     | 514/1056 [00:56<00:57,  9.46it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:06:31,355 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/19baf1fdefd7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:31,435 SpawnPoolWorker-37 DEBUG    upload finished in 0.81021s, attributes: file_id=de2f63c19dcc
2025-05-26 13:06:31,436 SpawnPoolWorker-37 DEBUG    upload finished in 0.810803s, attributes: file_id=de2f63c19dcc
2025-05-26 13:06:31,4

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:31,576 SpawnPoolWorker-36 DEBUG    upload finished in 0.731741s, attributes: file_id=614717f4fdb1
2025-05-26 13:06:31,576 SpawnPoolWorker-36 DEBUG    upload finished in 0.732337s, attributes: file_id=614717f4fdb1
upload:  49%|████▉     | 516/1056 [00:56<00:58,  9.25it/s]2025-05-26 13:06:31,579 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3d5399568b61.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:31,695 SpawnPoolWorker-38 DEBUG    upload finished in 0.964245s, attributes: file_id=fdd0c10e251d
2025-05-26 13:06:31,696 SpawnPoolWorker-38 DEBUG    upload finished in 0.964868s, attributes: file_id=fdd0c10e251d
upload:  49%|████▉ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:31,837 SpawnPoolWorker-41 DEBUG    upload finished in 0.824025s, attributes: file_id=0cbcd5252acc
2025-05-26 13:06:31,840 SpawnPoolWorker-41 DEBUG    upload finished in 0.827233s, attributes: file_id=0cbcd5252acc
upload:  49%|████▉     | 518/1056 [00:56<01:03,  8.45it/s]2025-05-26 13:06:31,845 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3e45508b3e26.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:31,906 SpawnPoolWorker-34 DEBUG    upload finished in 0.769725s, attributes: file_id=1f3b582d3590
2025-05-26 13:06:31,907 SpawnPoolWorker-34 DEBUG    upload finished in 0.770196s, attributes: file_id=1f3b582d3590
2025-05-26 13:06:31

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:32,094 SpawnPoolWorker-40 DEBUG    upload finished in 0.859181s, attributes: file_id=e879eb958ba7
2025-05-26 13:06:32,094 SpawnPoolWorker-40 DEBUG    upload finished in 0.859749s, attributes: file_id=e879eb958ba7
upload:  49%|████▉     | 521/1056 [00:57<00:54,  9.84it/s]2025-05-26 13:06:32,095 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/021ca8bc893d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:32,327 SpawnPoolWorker-39 DEBUG    upload finished in 0.973071s, attributes: file_id=19baf1fdefd7
2025-05-26 13:06:32,327 SpawnPoolWorker-39 DEBUG    upload finished in 0.973646s, attributes: file_id=19baf1fdefd7
upload:  49%|████▉     | 522/1056 [00:57<01:11,  7.50it/s]2025-05-26 13:06:32,329 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9562459a5cdb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:32,513 SpawnPoolWorker-37 DEBUG    upload finished in 1.07635s, attributes: file_id=c32e22df4772
2025-05-26 13:06:32,514 SpawnPoolWorker-37 DEBUG    upload finished in 1.077027s, attributes: file_id=c32e22df4772
upload:  50%|████▉  

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:32,563 SpawnPoolWorker-34 DEBUG    upload finished in 0.655237s, attributes: file_id=89efc08f6543
2025-05-26 13:06:32,563 SpawnPoolWorker-34 DEBUG    upload finished in 0.655678s, attributes: file_id=89efc08f6543
2025-05-26 13:06:32,565 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bdb714053945.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:32,642 SpawnPoolWorker-41 DEBUG    upload finished in 0.798591s, attributes: file_id=3e45508b3e26
2025-05-26 13:06:32,643 SpawnPoolWorker-41 DEBUG    upload finished in 0.799772s, attributes: file_id=3e45508b3e26
upload:  50%|████▉     | 525/1056 [00:57<00:59,  8.92it/s]2025-05-26 13:06:32,645 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:32,791 SpawnPoolWorker-40 DEBUG    upload finished in 0.69639s, attributes: file_id=021ca8bc893d
2025-05-26 13:06:32,792 SpawnPoolWorker-40 DEBUG    upload finished in 0.696839s, attributes: file_id=021ca8bc893d
upload:  50%|████▉     | 527/1056 [00:57<00:51, 10.21it/s]2025-05-26 13:06:32,795 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/567ec5bf4a96.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:32,931 SpawnPoolWorker-38 DEBUG    upload finished in 1.234712s, attributes: file_id=3a5b71c4e31a
2025-05-26 13:06:32,932 SpawnPoolWorker-38 DEBUG    upload finished in 1.235164s, attributes: file_id=3a5b71c4e31a
2025-05-26 13:06:32,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:33,047 SpawnPoolWorker-39 DEBUG    upload finished in 0.718611s, attributes: file_id=9562459a5cdb
2025-05-26 13:06:33,047 SpawnPoolWorker-39 DEBUG    upload finished in 0.71911s, attributes: file_id=9562459a5cdb
upload:  50%|█████     | 529/1056 [00:58<00:57,  9.23it/s]2025-05-26 13:06:33,051 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/be5835095a0f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:33,151 SpawnPoolWorker-35 DEBUG    upload finished in 1.159252s, attributes: file_id=99911038e538
2025-05-26 13:06:33,152 SpawnPoolWorker-35 DEBUG    upload finished in 1.159748s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:33,462 SpawnPoolWorker-34 DEBUG    upload finished in 0.897503s, attributes: file_id=bdb714053945
2025-05-26 13:06:33,462 SpawnPoolWorker-34 DEBUG    upload finished in 0.898023s, attributes: file_id=bdb714053945
upload:  50%|█████     | 532/1056 [00:58<01:05,  8.01it/s]2025-05-26 13:06:33,465 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ded8358d281b.json not detected as batch file data
2025-05-26 13:06:33,535 SpawnPoolWorker-36 DEBUG    upload finished in 0.818075s, attributes: file_id=561f1fe1b3b2
2025-05-26 13:06:33,535 SpawnPoolWorker-36 DEBUG    upload finished in 0.818696s, attributes: file_id=561f1fe1b3b2
2025-05-26 13:06:33,538 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3b8f6358e50b.json not detected as batch file data
A value is trying to be set on a copy of a slice from 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:33,733 SpawnPoolWorker-40 DEBUG    upload finished in 0.939262s, attributes: file_id=567ec5bf4a96
2025-05-26 13:06:33,733 SpawnPoolWorker-40 DEBUG    upload finished in 0.939715s, attributes: file_id=567ec5bf4a96
upload:  51%|█████     | 535/1056 [00:58<00:57,  9.10it/s]2025-05-26 13:06:33,734 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/37272a0b2b61.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:33,930 SpawnPoolWorker-38 DEBUG    upload finished in 0.997107s, attributes: file_id=030ef20a28e1
2025-05-26 13:06:33,930 SpawnPoolWorker-38 DEBUG    upload finished in 0.997688s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:34,068 SpawnPoolWorker-37 DEBUG    upload finished in 0.82435s, attributes: file_id=4bea894d2275
2025-05-26 13:06:34,069 SpawnPoolWorker-37 DEBUG    upload finished in 0.824926s, attributes: file_id=4bea894d2275
upload:  51%|█████     | 537/1056 [00:59<01:08,  7.61it/s]2025-05-26 13:06:34,072 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8aed506e5f1a.json not detected as batch file data
2025-05-26 13:06:34,074 SpawnPoolWorker-39 DEBUG    upload finished in 1.025512s, attributes: file_id=be5835095a0f
2025-05-26 13:06:34,074 SpawnPoolWorker-39 DEBUG    upload finished in 1.026021s, attributes: file_id=be5835095a0f
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:34,076 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:34,365 SpawnPoolWorker-36 DEBUG    upload finished in 0.828043s, attributes: file_id=3b8f6358e50b
2025-05-26 13:06:34,365 SpawnPoolWorker-36 DEBUG    upload finished in 0.828693s, attributes: file_id=3b8f6358e50b
upload:  51%|█████     | 541/1056 [00:59<00:48, 10.54it/s]2025-05-26 13:06:34,368 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/730956c16e1c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:34,476 SpawnPoolWorker-41 DEBUG    upload finished in 0.854778s, attributes: file_id=43bbc7c2574f
2025-05-26 13:06:34,477 SpawnPoolWorker-41 DEBUG    upload finished in 0.855342s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:34,603 SpawnPoolWorker-38 DEBUG    upload finished in 0.670724s, attributes: file_id=2fec7ed79594
2025-05-26 13:06:34,603 SpawnPoolWorker-38 DEBUG    upload finished in 0.671278s, attributes: file_id=2fec7ed79594
upload:  51%|█████▏    | 543/1056 [00:59<00:52,  9.70it/s]2025-05-26 13:06:34,605 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/517a099f57f0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:34,725 SpawnPoolWorker-40 DEBUG    upload finished in 0.991403s, attributes: file_id=37272a0b2b61
2025-05-26 13:06:34,725 SpawnPoolWorker-40 DEBUG    upload finished in 0.991901s, attributes: file_id=37272a0b2b61
2025-05-26 13:06:34

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:34,828 SpawnPoolWorker-39 DEBUG    upload finished in 0.751988s, attributes: file_id=b21b08c701f1
2025-05-26 13:06:34,828 SpawnPoolWorker-39 DEBUG    upload finished in 0.752511s, attributes: file_id=b21b08c701f1
upload:  52%|█████▏    | 545/1056 [00:59<00:54,  9.42it/s]2025-05-26 13:06:34,829 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e90dc8a5b8c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:34,942 SpawnPoolWorker-37 DEBUG    upload finished in 0.871366s, attributes: file_id=8aed506e5f1a
2025-05-26 13:06:34,942 SpawnPoolWorker-37 DEBUG    upload finished in 0.871959s, attributes: file_id=8aed506e5f1a
upload:  52%|█████▏

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:35,031 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8e70afdbe32.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:35,130 SpawnPoolWorker-35 DEBUG    upload finished in 0.873254s, attributes: file_id=30888762b692
2025-05-26 13:06:35,130 SpawnPoolWorker-35 DEBUG    upload finished in 0.873739s, attributes: file_id=30888762b692
upload:  52%|█████▏    | 548/1056 [01:00<00:52,  9.72it/s]2025-05-26 13:06:35,131 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cf0b5353e31c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:35,306 SpawnPoolWorker-36 DEBUG    upload finished in 0.939205s, attributes: file_id=730956c16e1c
2025-05-26 13:06:35,306 SpawnPoolWorker-36 DEBUG    upload finished in 0.939679s, attributes: file_id=730956c16e1c
upload:  52%|█████▏    | 550/1056 [01:00<00:49, 10.21it/s]2025-05-26 13:06:35,308 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5cb75b78a9f6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:35,375 SpawnPoolWorker-38 DEBUG    upload finished in 0.770522s, attributes: file_id=517a099f57f0
2025-05-26 13:06:35,375 SpawnPoolWorker-38 DEBUG    upload finished in 0.77099s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:35,536 SpawnPoolWorker-39 DEBUG    upload finished in 0.70711s, attributes: file_id=e90dc8a5b8c5
2025-05-26 13:06:35,536 SpawnPoolWorker-39 DEBUG    upload finished in 0.707578s, attributes: file_id=e90dc8a5b8c5
upload:  52%|█████▏    | 552/1056 [01:00<00:52,  9.66it/s]2025-05-26 13:06:35,539 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c641300cfa2d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:35,642 SpawnPoolWorker-40 DEBUG    upload finished in 0.915791s, attributes: file_id=2c49e36352ee
2025-05-26 13:06:35,643 SpawnPoolWorker-40 DEBUG    upload finished in 0.91623s, attributes: file_id=2c49e36352ee
upload:  52%|█████▏    | 553/1056 [01:00<00:52,  9.61it/s]2025-05-26 13:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:35,825 SpawnPoolWorker-37 DEBUG    upload finished in 0.881924s, attributes: file_id=4ae1530b6b3e
2025-05-26 13:06:35,826 SpawnPoolWorker-37 DEBUG    upload finished in 0.882529s, attributes: file_id=4ae1530b6b3e
upload:  52%|█████▏    | 554/1056 [01:00<01:00,  8.28it/s]2025-05-26 13:06:35,828 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a3df2537c6e4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:35,920 SpawnPoolWorker-34 DEBUG    upload finished in 0.889207s, attributes: file_id=f8e70afdbe32
2025-05-26 13:06:35,920 SpawnPoolWorker-34 DEBUG    upload finished in 0.889737s, attributes: file_id=f8e70afdbe32
2025-05-26 13:06:35

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:36,294 SpawnPoolWorker-36 DEBUG    upload finished in 0.986236s, attributes: file_id=5cb75b78a9f6
2025-05-26 13:06:36,294 SpawnPoolWorker-36 DEBUG    upload finished in 0.986716s, attributes: file_id=5cb75b78a9f6
upload:  53%|█████▎    | 557/1056 [01:01<01:13,  6.82it/s]2025-05-26 13:06:36,296 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dce3452728c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:36,317 SpawnPoolWorker-40 DEBUG    upload finished in 0.673548s, attributes: file_id=e76cc018e624
2025-05-26 13:06:36,318 SpawnPoolWorker-40 DEBUG    upload finished in 0.674221s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:36,574 SpawnPoolWorker-39 DEBUG    upload finished in 1.036569s, attributes: file_id=c641300cfa2d
2025-05-26 13:06:36,575 SpawnPoolWorker-39 DEBUG    upload finished in 1.037151s, attributes: file_id=c641300cfa2d
upload:  53%|█████▎    | 560/1056 [01:01<01:00,  8.18it/s]2025-05-26 13:06:36,577 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7b827ad753bc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:36,867 SpawnPoolWorker-38 DEBUG    upload finished in 1.4912s, attributes: file_id=5d6aa07d2198
2025-05-26 13:06:36,868 SpawnPoolWorker-38 DEBUG    upload finished in 1.491641s, attributes: file_id=5d6aa07d2198
upload:  53%|█████▎    | 561/1056 [01:01<01:20,  6.18it/s]2025-05-26 13:06:36,869 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7604213daa85.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:36,973 SpawnPoolWorker-34 DEBUG    upload finished in 1.051841s, attributes: file_id=2ac8f99ee545
2025-05-26 13:06:36,974 SpawnPoolWorker-34 DEBUG    upload finished in 1.052388s, attributes: file_id=2ac8f99ee545
upload:  53%|█████▎    | 562/1056 [01:01<01:13,  6.76it/s]2025-05-26 13:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:37,089 SpawnPoolWorker-37 DEBUG    upload finished in 1.26195s, attributes: file_id=a3df2537c6e4
2025-05-26 13:06:37,090 SpawnPoolWorker-37 DEBUG    upload finished in 1.262565s, attributes: file_id=a3df2537c6e4
upload:  53%|█████▎    | 563/1056 [01:02<01:08,  7.17it/s]2025-05-26 13:06:37,092 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0be7ef37a526.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:37,155 SpawnPoolWorker-36 DEBUG    upload finished in 0.859704s, attributes: file_id=dce3452728c5
2025-05-26 13:06:37,155 SpawnPoolWorker-36 DEBUG    upload finished in 0.860133s, attributes: file_id=dce3452728c5
2025-05-26 13:06:37,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:37,325 SpawnPoolWorker-40 DEBUG    upload finished in 1.00585s, attributes: file_id=e75ddb935156
2025-05-26 13:06:37,325 SpawnPoolWorker-40 DEBUG    upload finished in 1.006281s, attributes: file_id=e75ddb935156
2025-05-26 13:06:37,328 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ab51e0dce31d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:37,459 SpawnPoolWorker-41 DEBUG    upload finished in 1.445251s, attributes: file_id=5ebbcc2d2e7b
2025-05-26 13:06:37,459 SpawnPoolWorker-41 DEBUG    upload finished in 1.445736s, attributes: file_id=5ebbcc2d2e7b
upload:  54%|█████▎    | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:37,708 SpawnPoolWorker-39 DEBUG    upload finished in 1.131914s, attributes: file_id=7b827ad753bc
2025-05-26 13:06:37,708 SpawnPoolWorker-39 DEBUG    upload finished in 1.132393s, attributes: file_id=7b827ad753bc
upload:  54%|█████▍    | 568/1056 [01:02<01:08,  7.11it/s]2025-05-26 13:06:37,710 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3bb5ab93ad8f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:37,877 SpawnPoolWorker-34 DEBUG    upload finished in 0.90198s, attributes: file_id=f23114ccf808
2025-05-26 13:06:37,878 SpawnPoolWorker-34 DEBUG    upload finished in 0.902762s, attributes: file_id=f23114ccf808
upload:  54%|█████▍ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,030 SpawnPoolWorker-37 DEBUG    upload finished in 0.938595s, attributes: file_id=0be7ef37a526
2025-05-26 13:06:38,030 SpawnPoolWorker-37 DEBUG    upload finished in 0.939185s, attributes: file_id=0be7ef37a526
upload:  54%|█████▍    | 570/1056 [01:03<01:12,  6.73it/s]2025-05-26 13:06:38,033 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ad8d835da2eb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,141 SpawnPoolWorker-38 DEBUG    upload finished in 1.27266s, attributes: file_id=7604213daa85
2025-05-26 13:06:38,142 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,343 SpawnPoolWorker-41 DEBUG    upload finished in 0.882242s, attributes: file_id=2ad8f228b711
2025-05-26 13:06:38,343 SpawnPoolWorker-41 DEBUG    upload finished in 0.882958s, attributes: file_id=2ad8f228b711
upload:  54%|█████▍    | 573/1056 [01:03<00:59,  8.16it/s]2025-05-26 13:06:38,345 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/29e510ff7baa.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,443 SpawnPoolWorker-36 DEBUG    upload finished in 1.287035s, attributes: file_id=d17da98700a5
2025-05-26 13:06:38,443 SpawnPoolWorker-36 DEBUG    upload finished in 1.287536s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,663 SpawnPoolWorker-35 DEBUG    upload finished in 1.425933s, attributes: file_id=21b451bde75a
2025-05-26 13:06:38,663 SpawnPoolWorker-35 DEBUG    upload finished in 1.426397s, attributes: file_id=21b451bde75a
upload:  55%|█████▍    | 576/1056 [01:03<00:54,  8.75it/s]2025-05-26 13:06:38,665 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/45f6caa72711.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,738 SpawnPoolWorker-34 DEBUG    upload finished in 0.858166s, attributes: file_id=5c7fc356f859
2025-05-26 13:06:38,738 SpawnPoolWorker-34 DEBUG    upload finished in 0.858913s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:38,921 SpawnPoolWorker-37 DEBUG    upload finished in 0.889257s, attributes: file_id=ad8d835da2eb
2025-05-26 13:06:38,921 SpawnPoolWorker-37 DEBUG    upload finished in 0.889858s, attributes: file_id=ad8d835da2eb
2025-05-26 13:06:38,923 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/81039c414428.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:39,063 SpawnPoolWorker-40 DEBUG    upload finished in 0.837637s, attributes: file_id=ef057944d665
2025-05-26 13:06:39,063 SpawnPoolWorker-40 DEBUG    upload finished in 0.838159s, attr

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:39,177 SpawnPoolWorker-41 DEBUG    upload finished in 0.832139s, attributes: file_id=29e510ff7baa
2025-05-26 13:06:39,177 SpawnPoolWorker-41 DEBUG    upload finished in 0.832693s, attributes: file_id=29e510ff7baa
upload:  55%|█████▌    | 581/1056 [01:04<00:51,  9.20it/s]2025-05-26 13:06:39,179 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/26c83189fdda.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:39,303 SpawnPoolWorker-36 DEBUG    upload finished in 0.858391s, attributes: file_id=aff1e329c398
2025-05-26 13:06:39,303 SpawnPoolWorker-36 DEBUG    upload finished in 0.858973s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:39,426 SpawnPoolWorker-39 DEBUG    upload finished in 0.88543s, attributes: file_id=abe9d7488ac0
2025-05-26 13:06:39,426 SpawnPoolWorker-39 DEBUG    upload finished in 0.885994s, attributes: file_id=abe9d7488ac0
upload:  55%|█████▌    | 583/1056 [01:04<00:54,  8.70it/s]2025-05-26 13:06:39,428 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1d66f84c32db.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:39,502 SpawnPoolWorker-35 DEBUG    upload finished in 0.837507s, attributes: file_id=45f6caa72711
2025-05-26 13:06:39,502 SpawnPoolWorker-35 DEBUG    upload finished in 0.838091s, attributes: file_id=45f6caa72711
2025-05-26 13:06:39,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:39,696 SpawnPoolWorker-37 DEBUG    upload finished in 0.773477s, attributes: file_id=81039c414428
2025-05-26 13:06:39,696 SpawnPoolWorker-37 DEBUG    upload finished in 0.773974s, attributes: file_id=81039c414428
upload:  55%|█████▌    | 586/1056 [01:04<00:48,  9.78it/s]2025-05-26 13:06:39,698 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/56995d3f35db.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:39,830 SpawnPoolWorker-38 DEBUG    upload finished in 0.991483s, attributes: file_id=1603480cd4d0
2025-05-26 13:06:39,830 SpawnPoolWorker-38 DEBUG    upload finished in 0.992073s, attributes: file_id=1603480cd4d0
upload:  56%|█████▌    | 587/1056 [01:04<00:51,  9.09it/s]2025-05-26 13:06:39,832 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/i

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:39,961 SpawnPoolWorker-40 DEBUG    upload finished in 0.896592s, attributes: file_id=1c9488cdf35d
2025-05-26 13:06:39,961 SpawnPoolWorker-40 DEBUG    upload finished in 0.897081s, attributes: file_id=1c9488cdf35d
upload:  56%|█████▌    | 588/1056 [01:04<00:54,  8.66it/s]2025-05-26 13:06:39,963 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/66b1a5f1c7f6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:40,042 SpawnPoolWorker-36 DEBUG    upload finished in 0.73697s, attributes: file_id=ec415775b8bf
2025-05-26 13:06:40,042 SpawnPoolWorker-36 DEBUG    upload finished in 0.737492s, attributes: file_id=ec415775b8bf
2025-05-26 13:06:40,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:40,231 SpawnPoolWorker-39 DEBUG    upload finished in 0.803963s, attributes: file_id=1d66f84c32db
2025-05-26 13:06:40,232 SpawnPoolWorker-39 DEBUG    upload finished in 0.804478s, attributes: file_id=1d66f84c32db
2025-05-26 13:06:40,233 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99061dc78f2e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:40,320 SpawnPoolWorker-35 DEBUG    upload finished in 0.816217s, attributes: file_id=9ef512be7678
2025-05-26 13:06:40,320 SpawnPoolWorker-35 DEBUG    upload finished in 0.816876s, attributes: file_id=9ef512be7678
upload:  56%|█████▌    | 592/1056 [01:05<00:46, 10.08it/s]2025-05-26 13:06:40

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:40,516 SpawnPoolWorker-34 DEBUG    upload finished in 0.921051s, attributes: file_id=9d6ab5443f0c
2025-05-26 13:06:40,517 SpawnPoolWorker-34 DEBUG    upload finished in 0.921569s, attributes: file_id=9d6ab5443f0c
upload:  56%|█████▋    | 594/1056 [01:05<00:45, 10.12it/s]2025-05-26 13:06:40,518 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f1c4a3f0444f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:40,627 SpawnPoolWorker-38 DEBUG    upload finished in 0.795773s, attributes: file_id=b4c256ef5454
2025-05-26 13:06:40,627 SpawnPoolWorker-38 DEBUG    upload finished in 0.796315s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:40,722 SpawnPoolWorker-36 DEBUG    upload finished in 0.678301s, attributes: file_id=4710f67019c7
2025-05-26 13:06:40,722 SpawnPoolWorker-36 DEBUG    upload finished in 0.678812s, attributes: file_id=4710f67019c7
2025-05-26 13:06:40,724 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4ff2ec8d9664.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:40,871 SpawnPoolWorker-40 DEBUG    upload finished in 0.909017s, attributes: file_id=66b1a5f1c7f6
2025-05-26 13:06:40,872 SpawnPoolWorker-40 DEBUG    upload finished in 0.90951s, attributes: file_id=66b1a5f1c7f6
upload:  57%|█████▋    | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:41,005 SpawnPoolWorker-39 DEBUG    upload finished in 0.77177s, attributes: file_id=99061dc78f2e
2025-05-26 13:06:41,005 SpawnPoolWorker-39 DEBUG    upload finished in 0.772231s, attributes: file_id=99061dc78f2e
upload:  57%|█████▋    | 598/1056 [01:05<00:52,  8.78it/s]2025-05-26 13:06:41,007 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e658e9febf53.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:41,055 SpawnPoolWorker-37 DEBUG    upload finished in 0.672726s, attributes: file_id=84583039794c
2025-05-26 13:06:41,055 SpawnPoolWorker-37 DEBUG    upload finished in 0.673119s, attributes: file_id=84583039794c
2025-05-26 13:06:41,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:41,231 SpawnPoolWorker-35 DEBUG    upload finished in 0.908901s, attributes: file_id=4cfddaa612de
2025-05-26 13:06:41,231 SpawnPoolWorker-35 DEBUG    upload finished in 0.909447s, attributes: file_id=4cfddaa612de
2025-05-26 13:06:41,234 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dc99f261ab30.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:41,251 SpawnPoolWorker-38 DEBUG    upload finished in 0.622328s, attributes: file_id=431775a1b988
2025-05-26 13:06:41,251 SpawnPoolWorker-38 DEBUG    upload finished in 0.622791s, attributes: file_id=431775a1b988
upload:  57%|█████▋    | 602/1056 [01:06<00:37, 11.96it/s]2025-05-26 13:06:41

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:41,448 SpawnPoolWorker-40 DEBUG    upload finished in 0.575132s, attributes: file_id=b68523049763
2025-05-26 13:06:41,448 SpawnPoolWorker-40 DEBUG    upload finished in 0.575639s, attributes: file_id=b68523049763
upload:  57%|█████▋    | 604/1056 [01:06<00:40, 11.30it/s]2025-05-26 13:06:41,450 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bf6459598024.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:41,595 SpawnPoolWorker-36 DEBUG    upload finished in 0.872019s, attributes: file_id=4ff2ec8d9664
2025-05-26 13:06:41,596 SpawnPoolWorker-36 DEBUG    upload finished in 0.872574s, attributes: file_id=4ff2ec8d9664
2025-05-26 13:06:41

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:41,738 SpawnPoolWorker-39 DEBUG    upload finished in 0.732054s, attributes: file_id=e658e9febf53
2025-05-26 13:06:41,738 SpawnPoolWorker-39 DEBUG    upload finished in 0.732532s, attributes: file_id=e658e9febf53
upload:  57%|█████▋    | 606/1056 [01:06<00:47,  9.38it/s]2025-05-26 13:06:41,741 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51d5a1505a5a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:41,910 SpawnPoolWorker-37 DEBUG    upload finished in 0.853437s, attributes: file_id=55980119847c
2025-05-26 13:06:41,910 SpawnPoolWorker-37 DEBUG    upload finished in 0.853997s, attributes: file_id=55980119847c
2025-05-26 13:06:41

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,006 SpawnPoolWorker-41 DEBUG    upload finished in 0.885747s, attributes: file_id=c55a62087e22
2025-05-26 13:06:42,006 SpawnPoolWorker-41 DEBUG    upload finished in 0.886282s, attributes: file_id=c55a62087e22
upload:  58%|█████▊    | 608/1056 [01:06<00:51,  8.68it/s]2025-05-26 13:06:42,008 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce5011b9c799.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,178 SpawnPoolWorker-38 DEBUG    upload finished in 0.925882s, attributes: file_id=8ba353d17de5
2025-05-26 13:06:42,179 SpawnPoolWorker-38 DEBUG    upload finished in 0.926448s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,249 SpawnPoolWorker-35 DEBUG    upload finished in 1.016645s, attributes: file_id=dc99f261ab30
2025-05-26 13:06:42,250 SpawnPoolWorker-35 DEBUG    upload finished in 1.017214s, attributes: file_id=dc99f261ab30
2025-05-26 13:06:42,252 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b431fa3b0fd6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:42,362 SpawnPoolWorker-34 DEBUG    upload finished in 1.016448s, attributes: file_id=82ecc6181566
2025-05-26 13:06:42,363 SpawnPoolWorker-34 DEBUG    upload finished in 1.016967s, attributes: file_id=82ecc6181566
upload:  58%|█████▊    | 611/1056 [01:07<00:50,  8.76it/s]Removed trailing se

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:42,457 SpawnPoolWorker-40 DEBUG    upload finished in 1.007307s, attributes: file_id=bf6459598024
2025-05-26 13:06:42,457 SpawnPoolWorker-40 DEBUG    upload finished in 1.00782s, attributes: file_id=bf6459598024
2025-05-26 13:06:42,459 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ab1838ddf95f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,583 SpawnPoolWorker-36 DEBUG    upload finished in 0.985906s, attributes: file_id=721d0a5f0f30
2025-05-26 13:06:42,583 SpawnPoolWorker-36 DEBUG    upload finished in 0.98648s, attributes: file_id=721d0a5f0f30
upload:  58%|█████▊    | 6

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:42,720 SpawnPoolWorker-39 DEBUG    upload finished in 0.980638s, attributes: file_id=51d5a1505a5a
2025-05-26 13:06:42,720 SpawnPoolWorker-39 DEBUG    upload finished in 0.981065s, attributes: file_id=51d5a1505a5a
upload:  58%|█████▊    | 614/1056 [01:07<00:51,  8.51it/s]2025-05-26 13:06:42,722 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e90958bf9294.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,834 SpawnPoolWorker-37 DEBUG    upload finished in 0.922548s, attributes: file_id=74bc29a34b39
2025-05-26 13:06:42,834 SpawnPoolWorker-37 DEBUG    upload finished in 0.923012s, attributes: file_id=74bc29a34b39
upload:  58%|█████▊

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:42,975 SpawnPoolWorker-38 DEBUG    upload finished in 0.795174s, attributes: file_id=32bd77d320da
2025-05-26 13:06:42,976 SpawnPoolWorker-38 DEBUG    upload finished in 0.795674s, attributes: file_id=32bd77d320da
upload:  58%|█████▊    | 617/1056 [01:07<00:43, 10.07it/s]2025-05-26 13:06:42,977 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/69329c893bb8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:43,104 SpawnPoolWorker-35 DEBUG    upload finished in 0.85333s, attributes: file_id=b431fa3b0fd6
2025-05-26 13:06:43,105 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:43,215 SpawnPoolWorker-36 DEBUG    upload finished in 0.6306s, attributes: file_id=05433e9b76c6
2025-05-26 13:06:43,216 SpawnPoolWorker-36 DEBUG    upload finished in 0.631149s, attributes: file_id=05433e9b76c6
upload:  59%|█████▊    | 619/1056 [01:08<00:46,  9.39it/s]2025-05-26 13:06:43,218 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/189de956498a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:43,317 SpawnPoolWorker-34 DEBUG    upload finished in 0.953072s, attributes: file_id=bc436f5f8c2d
2025-05-26 13:06:43,318 SpawnPoolWorker-34 DEBUG    upload finished in 0.953637s, attributes: file_id=bc436f5f8c2d
upload:  59%|█████▊    | 620/1056 [01:08<00:46,  9.47it/s]2025-05-26 13:06:43,320 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0f526aed1a44.json not detected as batch file data
A value is trying to be set on a copy of a slice fr

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:43,436 SpawnPoolWorker-37 DEBUG    upload finished in 0.601105s, attributes: file_id=a4e69696c68f
2025-05-26 13:06:43,437 SpawnPoolWorker-37 DEBUG    upload finished in 0.601619s, attributes: file_id=a4e69696c68f
upload:  59%|█████▉    | 621/1056 [01:08<00:47,  9.20it/s]2025-05-26 13:06:43,439 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/442cac5437ce.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:43,526 SpawnPoolWorker-39 DEBUG    upload finished in 0.804962s, attributes: file_id=e90958bf9294
2025-05-26 13:06:43,526 SpawnPoolWorker-39 DEBUG    upload finished in 0.805395s, attributes: file_id=e90958bf9294
2025-05-26 13:06:43

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:43,789 SpawnPoolWorker-41 DEBUG    upload finished in 0.875866s, attributes: file_id=a61c02c25fc8
2025-05-26 13:06:43,789 SpawnPoolWorker-41 DEBUG    upload finished in 0.876299s, attributes: file_id=a61c02c25fc8
upload:  59%|█████▉    | 624/1056 [01:08<00:50,  8.63it/s]2025-05-26 13:06:43,791 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ae57232e8c0b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:43,834 SpawnPoolWorker-36 DEBUG    upload finished in 0.617164s, attributes: file_id=189de956498a
2025-05-26 13:06:43,834 SpawnPoolWorker-36 DEBUG    upload finished in 0.617629s, attributes: file_id=189de956498a
2025-05-26 13:06:43

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,124 SpawnPoolWorker-35 DEBUG    upload finished in 1.018322s, attributes: file_id=2012cdb0438e
2025-05-26 13:06:44,124 SpawnPoolWorker-35 DEBUG    upload finished in 1.018838s, attributes: file_id=2012cdb0438e
upload:  59%|█████▉    | 627/1056 [01:09<00:50,  8.56it/s]2025-05-26 13:06:44,126 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/61455c2d980a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,228 SpawnPoolWorker-34 DEBUG    upload finished in 0.908651s, attributes: file_id=0f526aed1a44
2025-05-26 13:06:44,228 SpawnPoolWorker-34 DEBUG    upload finished in 0.909153s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,421 SpawnPoolWorker-37 DEBUG    upload finished in 0.982708s, attributes: file_id=442cac5437ce
2025-05-26 13:06:44,421 SpawnPoolWorker-37 DEBUG    upload finished in 0.98331s, attributes: file_id=442cac5437ce
upload:  60%|█████▉    | 630/1056 [01:09<00:45,  9.36it/s]2025-05-26 13:06:44,426 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8b34d0774bca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,516 SpawnPoolWorker-40 DEBUG    upload finished in 0.884073s, attributes: file_id=1c6ad10ce4d1
2025-05-26 13:06:44,517 SpawnPoolWorker-40 DEBUG    upload finished in 0.884574s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,728 SpawnPoolWorker-36 DEBUG    upload finished in 0.892824s, attributes: file_id=4afd344c4a17
2025-05-26 13:06:44,729 SpawnPoolWorker-36 DEBUG    upload finished in 0.893279s, attributes: file_id=4afd344c4a17
2025-05-26 13:06:44,730 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/12c4c228fad2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:44,848 SpawnPoolWorker-38 DEBUG    upload finished in 0.880535s, attributes: file_id=93f93fb9c670
2025-05-26 13:06:44,848 SpawnPoolWorker-38 DEBUG    upload finished in 0.881222s, attributes: file_id=93f93fb9c670
upload:  60%|██████    |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:44,940 SpawnPoolWorker-35 DEBUG    upload finished in 0.813959s, attributes: file_id=61455c2d980a
2025-05-26 13:06:44,940 SpawnPoolWorker-35 DEBUG    upload finished in 0.814555s, attributes: file_id=61455c2d980a
2025-05-26 13:06:44,941 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/94571e4205e0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:45,040 SpawnPoolWorker-34 DEBUG    upload finished in 0.810974s, attributes: file_id=a89dc0344021
2025-05-26 13:06:45,040 SpawnPoolWorker-34 DEBUG    upload finished in 0.811455s, attributes: file_id=a89dc0344021
upload:  60%|██████    | 636/1056 [01:10<00:44,  9.54it/s]2025-05-26 13:06:45

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:45,189 SpawnPoolWorker-39 DEBUG    upload finished in 0.868426s, attributes: file_id=ad508066ba55
2025-05-26 13:06:45,190 SpawnPoolWorker-39 DEBUG    upload finished in 0.868923s, attributes: file_id=ad508066ba55
upload:  60%|██████    | 637/1056 [01:10<00:47,  8.83it/s]2025-05-26 13:06:45,191 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8071805ca6b9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:45,267 SpawnPoolWorker-40 DEBUG    upload finished in 0.748824s, attributes: file_id=ba9b26ca250c
2025-05-26 13:06:45,267 SpawnPoolWorker-40 DEBUG    upload finished in 0.749281s, attributes: file_id=ba9b26ca250c
2025-05-26 13:06:45,268 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:45,514 SpawnPoolWorker-41 DEBUG    upload finished in 0.923753s, attributes: file_id=1111f4ceb69e
2025-05-26 13:06:45,515 SpawnPoolWorker-41 DEBUG    upload finished in 0.924241s, attributes: file_id=1111f4ceb69e
upload:  61%|██████    | 640/1056 [01:10<00:46,  8.93it/s]2025-05-26 13:06:45,516 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cc840e2f852a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:45,758 SpawnPoolWorker-34 DEBUG    upload finished in 0.716718s, attributes: file_id=c515785624c2
2025-05-26 13:06:45,759 SpawnPoolWorker-34 DEBUG    upload finished in 0.717199s, attributes: file_id=c515785624c2
upload:  61%|██████    | 642/1056 [01:10<00:48,  8.58it/s]2025-05-26 13:06:45,761 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/524085e71231.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:45,921 SpawnPoolWorker-38 DEBUG    upload finished in 1.070763s, attributes: file_id=1f54ed593cbe
2025-05-26 13:06:45,921 SpawnPoolWorker-38 DEBUG    upload finished in 1.071201s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:46,032 SpawnPoolWorker-39 DEBUG    upload finished in 0.840741s, attributes: file_id=8071805ca6b9
2025-05-26 13:06:46,032 SpawnPoolWorker-39 DEBUG    upload finished in 0.841219s, attributes: file_id=8071805ca6b9
upload:  61%|██████    | 644/1056 [01:11<00:50,  8.09it/s]2025-05-26 13:06:46,033 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/66f0125736de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:46,149 SpawnPoolWorker-35 DEBUG    upload finished in 1.20845s, attributes: file_id=94571e4205e0
2025-05-26 13:06:46,150 SpawnPoolWorker-35 DEBUG    upload finished in 1.208896s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:46,239 SpawnPoolWorker-37 DEBUG    upload finished in 0.848052s, attributes: file_id=7432f8a94e2d
2025-05-26 13:06:46,239 SpawnPoolWorker-37 DEBUG    upload finished in 0.848527s, attributes: file_id=7432f8a94e2d
2025-05-26 13:06:46,241 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8040f3edee8a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:46,319 SpawnPoolWorker-41 DEBUG    upload finished in 0.803014s, attributes: file_id=cc840e2f852a
2025-05-26 13:06:46,319 SpawnPoolWorker-41 DEBUG    upload finished in 0.803464s, attributes: file_id=cc840e2f852a
upload:  61%|██████▏   | 647/1056 [01:11<00:43,  9.47it/s]2025-05-26 13:06:46

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:46,445 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1c40f053f7a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:46,551 SpawnPoolWorker-36 DEBUG    upload finished in 0.920705s, attributes: file_id=28e26c885c23
2025-05-26 13:06:46,551 SpawnPoolWorker-36 DEBUG    upload finished in 0.9213s, attributes: file_id=28e26c885c23
upload:  61%|██████▏   | 649/1056 [01:11<00:44,  9.13it/s]2025-05-26 13:06:46,554 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7f67c18a6a18.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:46,730 SpawnPoolWorker-39 DEBUG    upload finished in 0.697176s, attributes: file_id=66f0125736de
2025-05-26 13:06:46,730 SpawnPoolWorker-39 DEBUG    upload finished in 0.697599s, attributes: file_id=66f0125736de
upload:  62%|██████▏   | 651/1056 [01:11<00:40,  9.90it/s]2025-05-26 13:06:46,732 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bbc3b36ac8c3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:46,848 SpawnPoolWorker-34 DEBUG    upload finished in 1.088299s, attributes: file_id=524085e71231
2025-05-26 13:06:46,848 SpawnPoolWorker-34 DEBUG    upload finished in 1.088866s, attributes: file_id=524085e71231
upload:  62%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:47,002 SpawnPoolWorker-35 DEBUG    upload finished in 0.851446s, attributes: file_id=cd660b06d0a5
2025-05-26 13:06:47,003 SpawnPoolWorker-35 DEBUG    upload finished in 0.851912s, attributes: file_id=cd660b06d0a5
upload:  62%|██████▏   | 653/1056 [01:11<00:47,  8.53it/s]2025-05-26 13:06:47,004 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7a6cb7389a16.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:47,124 SpawnPoolWorker-41 DEBUG    upload finished in 0.803724s, attributes: file_id=f6c6bbf97eb6
2025-05-26 13:06:47,124 SpawnPoolWorker-41 DEBUG    upload finished in 0.804269s, attributes: file_id=f6c6bbf97eb6
upload:  62%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:47,234 SpawnPoolWorker-40 DEBUG    upload finished in 0.789214s, attributes: file_id=c1c40f053f7a
2025-05-26 13:06:47,234 SpawnPoolWorker-40 DEBUG    upload finished in 0.789671s, attributes: file_id=c1c40f053f7a
upload:  62%|██████▏   | 655/1056 [01:12<00:46,  8.62it/s]2025-05-26 13:06:47,236 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac2c85afcb54.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:47,419 SpawnPoolWorker-37 DEBUG    upload finished in 1.178566s, attributes: file_id=8040f3edee8a
2025-05-26 13:06:47,419 SpawnPoolWorker-37 DEBUG    upload finished in 1.179125s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:47,595 SpawnPoolWorker-36 DEBUG    upload finished in 1.042261s, attributes: file_id=7f67c18a6a18
2025-05-26 13:06:47,595 SpawnPoolWorker-36 DEBUG    upload finished in 1.042988s, attributes: file_id=7f67c18a6a18
upload:  62%|██████▏   | 657/1056 [01:12<00:58,  6.80it/s]2025-05-26 13:06:47,598 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ef534e2d9dea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:47,776 SpawnPoolWorker-39 DEBUG    upload finished in 1.044708s, attributes: file_id=bbc3b36ac8c3
2025-05-26 13:06:47,777 SpawnPoolWorker-39 DEBUG    upload finished in 1.045245s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:47,951 SpawnPoolWorker-38 DEBUG    upload finished in 1.325505s, attributes: file_id=c76699374a41
2025-05-26 13:06:47,952 SpawnPoolWorker-38 DEBUG    upload finished in 1.32636s, attributes: file_id=c76699374a41
upload:  62%|██████▏   | 659/1056 [01:12<01:04,  6.16it/s]2025-05-26 13:06:47,955 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3fa187299984.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:48,120 SpawnPoolWorker-41 DEBUG    upload finished in 0.994215s, attributes: file_id=09aff50321c6
2025-05-26 13:06:48,120 SpawnPoolWorker-41 DEBUG    upload finished in 0.994955s, attributes: file_id=09aff50321c6
upload:  62%|██████▎

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:48,228 SpawnPoolWorker-37 DEBUG    upload finished in 0.807744s, attributes: file_id=ca47f937a531
2025-05-26 13:06:48,229 SpawnPoolWorker-37 DEBUG    upload finished in 0.808375s, attributes: file_id=ca47f937a531
upload:  63%|██████▎   | 661/1056 [01:13<00:58,  6.77it/s]2025-05-26 13:06:48,231 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c35e1e475e81.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:48,417 SpawnPoolWorker-35 DEBUG    upload finished in 1.413582s, attributes: file_id=7a6cb7389a16
2025-05-26 13:06:48,418 SpawnPoolWorker-35 DEBUG    upload finished in 1.414097s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:48,581 SpawnPoolWorker-36 DEBUG    upload finished in 0.983581s, attributes: file_id=ef534e2d9dea
2025-05-26 13:06:48,582 SpawnPoolWorker-36 DEBUG    upload finished in 0.984454s, attributes: file_id=ef534e2d9dea
upload:  63%|██████▎   | 663/1056 [01:13<01:03,  6.20it/s]2025-05-26 13:06:48,585 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/13793f4c3f42.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:48,719 SpawnPoolWorker-39 DEBUG    upload finished in 0.941227s, attributes: file_id=ff1490b24bd3
2025-05-26 13:06:48,720 SpawnPoolWorker-39 DEBUG    upload finished in 0.941726s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:48,792 SpawnPoolWorker-38 DEBUG    upload finished in 0.837966s, attributes: file_id=3fa187299984
2025-05-26 13:06:48,792 SpawnPoolWorker-38 DEBUG    upload finished in 0.838533s, attributes: file_id=3fa187299984
2025-05-26 13:06:48,794 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5f55c73084f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:48,958 SpawnPoolWorker-34 DEBUG    upload finished in 2.108126s, attributes: file_id=a274fb1b63aa
2025-05-26 13:06:48,958 SpawnPoolWorker-34 DEBUG    upload finished in 2.108675s, attributes: file_id=a274fb1b63aa
upload:  63%|██████▎   | 666/1056 [01:13<00:53,  7.24it/s]2025-05-26 13:06:48

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:49,056 SpawnPoolWorker-41 DEBUG    upload finished in 0.933308s, attributes: file_id=69381daa3222
2025-05-26 13:06:49,056 SpawnPoolWorker-41 DEBUG    upload finished in 0.933889s, attributes: file_id=69381daa3222
2025-05-26 13:06:49,058 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/768e1b81c5bd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:49,136 SpawnPoolWorker-40 DEBUG    upload finished in 1.901433s, attributes: file_id=ac2c85afcb54
2025-05-26 13:06:49,137 SpawnPoolWorker-40 DEBUG    upload finished in 1.901974s, attributes: file_id=ac2c85afcb54
upload:  63%|██████▎   |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:49,226 SpawnPoolWorker-37 DEBUG    upload finished in 0.995766s, attributes: file_id=c35e1e475e81
2025-05-26 13:06:49,226 SpawnPoolWorker-37 DEBUG    upload finished in 0.996272s, attributes: file_id=c35e1e475e81
2025-05-26 13:06:49,228 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed194552da58.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:49,355 SpawnPoolWorker-36 DEBUG    upload finished in 0.771386s, attributes: file_id=13793f4c3f42
2025-05-26 13:06:49,356 SpawnPoolWorker-36 DEBUG    upload finished in 0.77198s, attributes: file_id=13793f4c3f42
upload:  63%|██████▎   | 670/1056 [01:14<00:44,  8.67it/s]2025-05-26 13:06:49,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:49,615 SpawnPoolWorker-39 DEBUG    upload finished in 0.893816s, attributes: file_id=323c1fdfcf59
2025-05-26 13:06:49,615 SpawnPoolWorker-39 DEBUG    upload finished in 0.894306s, attributes: file_id=323c1fdfcf59
upload:  64%|██████▎   | 672/1056 [01:14<00:46,  8.23it/s]2025-05-26 13:06:49,618 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ba6b757a9645.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:49,724 SpawnPoolWorker-38 DEBUG    upload finished in 0.931044s, attributes: file_id=5f55c73084f7
2025-05-26 13:06:49,725 SpawnPoolWorker-38 DEBUG    upload finished in 0.931681s, attributes: file_id=5f55c73084f7
upload:  64%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:49,829 SpawnPoolWorker-41 DEBUG    upload finished in 0.772305s, attributes: file_id=768e1b81c5bd
2025-05-26 13:06:49,830 SpawnPoolWorker-41 DEBUG    upload finished in 0.772809s, attributes: file_id=768e1b81c5bd
upload:  64%|██████▍   | 674/1056 [01:14<00:43,  8.69it/s]2025-05-26 13:06:49,832 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/12fbe226b282.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:49,935 SpawnPoolWorker-34 DEBUG    upload finished in 0.976097s, attributes: file_id=215701687ebd
2025-05-26 13:06:49,936 SpawnPoolWorker-34 DEBUG    upload finished in 0.97668s, attributes: file_id=215701687ebd
upload:  64%|██████▍

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:50,062 SpawnPoolWorker-40 DEBUG    upload finished in 0.923289s, attributes: file_id=ec0771d845c0
2025-05-26 13:06:50,062 SpawnPoolWorker-40 DEBUG    upload finished in 0.923866s, attributes: file_id=ec0771d845c0
upload:  64%|██████▍   | 676/1056 [01:15<00:44,  8.59it/s]2025-05-26 13:06:50,064 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/692c48d38750.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:50,145 SpawnPoolWorker-37 DEBUG    upload finished in 0.91759s, attributes: file_id=ed194552da58
2025-05-26 13:06:50,145 SpawnPoolWorker-37 DEBUG    upload finished in 0.918112s, attributes: file_id=ed194552da58
2025-05-26 13:06:50,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:50,290 SpawnPoolWorker-36 DEBUG    upload finished in 0.933492s, attributes: file_id=aa5dcc70c822
2025-05-26 13:06:50,291 SpawnPoolWorker-36 DEBUG    upload finished in 0.933988s, attributes: file_id=aa5dcc70c822
upload:  64%|██████▍   | 678/1056 [01:15<00:43,  8.66it/s]2025-05-26 13:06:50,292 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7cceb2ff086c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:50,392 SpawnPoolWorker-35 DEBUG    upload finished in 0.914668s, attributes: file_id=e938bf9eafb4
2025-05-26 13:06:50,393 SpawnPoolWorker-35 DEBUG    upload finished in 0.915203s, attributes: file_id=e938bf9eafb4
upload:  64%|██████▍   | 679/1056 [01:15<00:42,  8.91it/s]2025-05-26 13:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:50,525 SpawnPoolWorker-38 DEBUG    upload finished in 0.79845s, attributes: file_id=9d4fd7d355de
2025-05-26 13:06:50,525 SpawnPoolWorker-38 DEBUG    upload finished in 0.798997s, attributes: file_id=9d4fd7d355de
upload:  64%|██████▍   | 680/1056 [01:15<00:44,  8.52it/s]2025-05-26 13:06:50,527 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fda088c77d17.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:50,626 SpawnPoolWorker-41 DEBUG    upload finished in 0.795041s, attributes: file_id=12fbe226b282
2025-05-26 13:06:50,626 SpawnPoolWorker-41 DEBUG    upload finished in 0.795594s, attributes: file_id=12fbe226b282
upload:  64%|██████▍

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:50,739 SpawnPoolWorker-34 DEBUG    upload finished in 0.801944s, attributes: file_id=54a11f63b084
2025-05-26 13:06:50,739 SpawnPoolWorker-34 DEBUG    upload finished in 0.802528s, attributes: file_id=54a11f63b084
upload:  65%|██████▍   | 682/1056 [01:15<00:42,  8.84it/s]2025-05-26 13:06:50,742 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ccae9134b7fd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:50,818 SpawnPoolWorker-37 DEBUG    upload finished in 0.67192s, attributes: file_id=28a2eb664c3e
2025-05-26 13:06:50,818 SpawnPoolWorker-37 DEBUG    upload finished in 0.672395s, attributes: file_id=28a2eb664c3e
2025-05-26 13:06:50,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:51,095 SpawnPoolWorker-40 DEBUG    upload finished in 1.032058s, attributes: file_id=692c48d38750
2025-05-26 13:06:51,096 SpawnPoolWorker-40 DEBUG    upload finished in 1.03255s, attributes: file_id=692c48d38750
upload:  65%|██████▍   | 685/1056 [01:16<00:44,  8.30it/s]2025-05-26 13:06:51,097 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4d325567140f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:51,168 SpawnPoolWorker-36 DEBUG    upload finished in 0.876277s, attributes: file_id=7cceb2ff086c
2025-05-26 13:06:51,168 SpawnPoolWorker-36 DEBUG    upload finished in 0.876788s, attributes: file_id=7cceb2ff086c
2025-05-26 13:06:51,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:51,391 SpawnPoolWorker-35 DEBUG    upload finished in 0.996773s, attributes: file_id=53b282a1313a
2025-05-26 13:06:51,391 SpawnPoolWorker-35 DEBUG    upload finished in 0.997432s, attributes: file_id=53b282a1313a
upload:  65%|██████▌   | 688/1056 [01:16<00:40,  9.11it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:06:51,393 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d03c9bd27c7a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:51,484 SpawnPoolWorker-41 DEBUG    upload finished in 0.85591s, attributes: file_id=29e0e2780ad3
2025-05-26 13:06:51,484 SpawnPoolWorker-41 DEBUG    upload finished in 0.856453s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:51,595 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/397446cc306b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:51,697 SpawnPoolWorker-37 DEBUG    upload finished in 0.877314s, attributes: file_id=c5f39657525c
2025-05-26 13:06:51,697 SpawnPoolWorker-37 DEBUG    upload finished in 0.877785s, attributes: file_id=c5f39657525c
upload:  65%|██████▌   | 691/1056 [01:16<00:38,  9.44it/s]2025-05-26 13:06:51,699 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/837ffd873211.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:51,860 SpawnPoolWorker-40 DEBUG    upload finished in 0.763209s, attributes: file_id=4d325567140f
2025-05-26 13:06:51,860 SpawnPoolWorker-40 DEBUG    upload finished in 0.763681s, attributes: file_id=4d325567140f
upload:  66%|██████▌   | 692/1056 [01:16<00:43,  8.37it/s]2025-05-26 13:06:51,862 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5bff9f970545.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:51,971 SpawnPoolWorker-39 DEBUG    upload finished in 1.050204s, attributes: file_id=2f091014e8d7
2025-05-26 13:06:51,971 SpawnPoolWorker-39 DEBUG    upload finished in 1.05063s, attributes: file_id=2f091014e8d7
upload:  66%|██████▌

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:52,132 SpawnPoolWorker-38 DEBUG    upload finished in 0.854636s, attributes: file_id=2afe03e157c3
2025-05-26 13:06:52,132 SpawnPoolWorker-38 DEBUG    upload finished in 0.855128s, attributes: file_id=2afe03e157c3
upload:  66%|██████▌   | 695/1056 [01:17<00:36,  9.83it/s]2025-05-26 13:06:52,136 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7231f5d9eda3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:52,294 SpawnPoolWorker-35 DEBUG    upload finished in 0.901089s, attributes: file_id=d03c9bd27c7a
2025-05-26 13:06:52,294 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:52,365 SpawnPoolWorker-41 DEBUG    upload finished in 0.879484s, attributes: file_id=e7556cf44261
2025-05-26 13:06:52,365 SpawnPoolWorker-41 DEBUG    upload finished in 0.87998s, attributes: file_id=e7556cf44261
2025-05-26 13:06:52,367 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/26aacae66562.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:52,475 SpawnPoolWorker-34 DEBUG    upload finished in 0.880776s, attributes: file_id=397446cc306b
2025-05-26 13:06:52,475 SpawnPoolWorker-34 DEBUG    upload finished in 0.881293s, attributes: file_id=397446cc306b
upload:  66%|██████▌   | 698/1056 [01:17<00:37,  9.46it/s]2025-05-26 13:06:52,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:52,664 SpawnPoolWorker-40 DEBUG    upload finished in 0.803007s, attributes: file_id=5bff9f970545
2025-05-26 13:06:52,665 SpawnPoolWorker-40 DEBUG    upload finished in 0.80354s, attributes: file_id=5bff9f970545
upload:  66%|██████▋   | 700/1056 [01:17<00:36,  9.83it/s]2025-05-26 13:06:52,666 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/80b71c877cd0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:52,737 SpawnPoolWorker-39 DEBUG    upload finished in 0.765011s, attributes: file_id=5915225e2b1e
2025-05-26 13:06:52,738 SpawnPoolWorker-39 DEBUG    upload finished in 0.765564s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:52,941 SpawnPoolWorker-38 DEBUG    upload finished in 0.808165s, attributes: file_id=7231f5d9eda3
2025-05-26 13:06:52,941 SpawnPoolWorker-38 DEBUG    upload finished in 0.808674s, attributes: file_id=7231f5d9eda3
2025-05-26 13:06:52,943 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/95a04c3dd59d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:53,016 SpawnPoolWorker-35 DEBUG    upload finished in 0.721129s, attributes: file_id=1f8ce65c54b5
2025-05-26 13:06:53,016 SpawnPoolWorker-35 DEBUG    upload finished in 0.721646s, attributes: file_id=1f8ce65c54b5
upload:  67%|██████▋   |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:53,175 SpawnPoolWorker-41 DEBUG    upload finished in 0.80936s, attributes: file_id=26aacae66562
2025-05-26 13:06:53,176 SpawnPoolWorker-41 DEBUG    upload finished in 0.80976s, attributes: file_id=26aacae66562
2025-05-26 13:06:53,177 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c630f32ed3a4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:53,274 SpawnPoolWorker-34 DEBUG    upload finished in 0.798019s, attributes: file_id=58388b65f043
2025-05-26 13:06:53,275 SpawnPoolWorker-34 DEBUG    upload finished in 0.798488s, attributes: file_id=58388b65f043
upload:  67%|██████▋   | 706/1056 [01:18<00:36,  9.54it/s]2025-05-26 13:06:53,276 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:53,478 SpawnPoolWorker-37 DEBUG    upload finished in 0.927171s, attributes: file_id=89dad04c13de
2025-05-26 13:06:53,478 SpawnPoolWorker-37 DEBUG    upload finished in 0.927755s, attributes: file_id=89dad04c13de
upload:  67%|██████▋   | 708/1056 [01:18<00:36,  9.62it/s]2025-05-26 13:06:53,480 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74e692263842.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:53,576 SpawnPoolWorker-36 DEBUG    upload finished in 0.722623s, attributes: file_id=1ba6326b829a
2025-05-26 13:06:53,576 SpawnPoolWorker-36 DEBUG    upload finished in 0.723105s, attributes: file_id=1ba6326b829a
2025-05-26 13:06:53

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

2025-05-26 13:06:53,750 SpawnPoolWorker-35 DEBUG    upload finished in 0.732208s, attributes: file_id=9d0ddb7f22fd
2025-05-26 13:06:53,750 SpawnPoolWorker-35 DEBUG    upload finished in 0.732628s, attributes: file_id=9d0ddb7f22fd
2025-05-26 13:06:53,753 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/52c59047136a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:53,916 SpawnPoolWorker-39 DEBUG    upload finished in 1.176742s, attributes: file_id=ce95a75dc7c1
2025-05-26 13:06:53,916 SpawnPoolWorker-39 DEBUG    upload finished in 1.177247s, attributes: file_id=ce95a75dc7c1
upload:  67%|██████▋   | 712/1056 [01:18<00:37,  9.20it/s]2025-05-26 13:06:53

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:54,031 SpawnPoolWorker-34 DEBUG    upload finished in 0.755044s, attributes: file_id=3057c7d22745
2025-05-26 13:06:54,031 SpawnPoolWorker-34 DEBUG    upload finished in 0.755574s, attributes: file_id=3057c7d22745
upload:  68%|██████▊   | 713/1056 [01:19<00:37,  9.10it/s]2025-05-26 13:06:54,033 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d81c3daaf06e.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:54,156 SpawnPoolWorker-40 DEBUG    upload finished in 0.798344s, attributes: file_id=59e602562ecb
2025-05-26 13:06:54,156 SpawnPoolWorker-40 DEBUG    upload finished in 0.798901s, attributes: file_id=59e602562ecb
upload:  68%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:54,291 SpawnPoolWorker-41 DEBUG    upload finished in 1.11415s, attributes: file_id=c630f32ed3a4
2025-05-26 13:06:54,291 SpawnPoolWorker-41 DEBUG    upload finished in 1.114667s, attributes: file_id=c630f32ed3a4
upload:  68%|██████▊   | 715/1056 [01:19<00:40,  8.48it/s]2025-05-26 13:06:54,293 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2b78d99e26d3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:54,390 SpawnPoolWorker-37 DEBUG    upload finished in 0.910323s, attributes: file_id=74e692263842
2025-05-26 13:06:54,390 SpawnPoolWorker-37 DEBUG    upload finished in 0.910925s, attributes: file_id=74e692263842
2025-05-26 13:06:54,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:06:54,567 SpawnPoolWorker-36 DEBUG    upload finished in 0.989851s, attributes: file_id=61e9b333deaf
2025-05-26 13:06:54,568 SpawnPoolWorker-36 DEBUG    upload finished in 0.990354s, attributes: file_id=61e9b333deaf
2025-05-26 13:06:54,569 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fbb925993f57.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:54,651 SpawnPoolWorker-39 DEBUG    upload finished in 0.733236s, attributes: file_id=b0b12664fc31
2025-05-26 13:06:54,651 SpawnPoolWorker-39 DEBUG    upload finished in 0.73373s, attributes: file_id=b0b12664fc31
upload:  68%|██████▊   | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:54,793 SpawnPoolWorker-35 DEBUG    upload finished in 1.041746s, attributes: file_id=52c59047136a
2025-05-26 13:06:54,793 SpawnPoolWorker-35 DEBUG    upload finished in 1.042216s, attributes: file_id=52c59047136a
upload:  68%|██████▊   | 720/1056 [01:19<00:36,  9.17it/s]2025-05-26 13:06:54,795 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7284e1de9bb8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:54,903 SpawnPoolWorker-34 DEBUG    upload finished in 0.870758s, attributes: file_id=d81c3daaf06e
2025-05-26 13:06:54,904 SpawnPoolWorker-34 DEBUG    upload finished in 0.871328s, attributes: file_id=d81c3daaf06e
upload:  68%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:55,033 SpawnPoolWorker-41 DEBUG    upload finished in 0.740274s, attributes: file_id=2b78d99e26d3
2025-05-26 13:06:55,033 SpawnPoolWorker-41 DEBUG    upload finished in 0.740879s, attributes: file_id=2b78d99e26d3
upload:  68%|██████▊   | 722/1056 [01:20<00:38,  8.75it/s]2025-05-26 13:06:55,036 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4ad992878e4d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:55,164 SpawnPoolWorker-40 DEBUG    upload finished in 1.005825s, attributes: file_id=a45daba4dace
2025-05-26 13:06:55,164 SpawnPoolWorker-40 DEBUG    upload finished in 1.006459s, attributes: file_id=a45daba4dace
upload:  68%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:55,282 SpawnPoolWorker-37 DEBUG    upload finished in 0.890149s, attributes: file_id=5ea248c6f940
2025-05-26 13:06:55,282 SpawnPoolWorker-37 DEBUG    upload finished in 0.890695s, attributes: file_id=5ea248c6f940
upload:  69%|██████▊   | 724/1056 [01:20<00:39,  8.45it/s]2025-05-26 13:06:55,285 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/115039929c28.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:55,376 SpawnPoolWorker-36 DEBUG    upload finished in 0.807272s, attributes: file_id=fbb925993f57
2025-05-26 13:06:55,376 SpawnPoolWorker-36 DEBUG    upload finished in 0.807802s, attributes: file_id=fbb925993f57
Removed trailing se

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:55,596 SpawnPoolWorker-38 DEBUG    upload finished in 1.116928s, attributes: file_id=9685822e72ac
2025-05-26 13:06:55,596 SpawnPoolWorker-38 DEBUG    upload finished in 1.117504s, attributes: file_id=9685822e72ac
upload:  69%|██████▉   | 727/1056 [01:20<00:36,  8.97it/s]2025-05-26 13:06:55,598 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/986f7d5d8ec7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:55,859 SpawnPoolWorker-34 DEBUG    upload finished in 0.954404s, attributes: file_id=9fc49d523567
2025-05-26 13:06:55,860 SpawnPoolWorker-34 DEBUG    upload finished in 0.954907s, attributes: file_id=9fc49d523567
upload:  69%|██████▉   | 729/1056 [01:20<00:39,  8.30it/s]2025-05-26 13:06:55,862 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3b148abb26f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:55,948 SpawnPoolWorker-37 DEBUG    upload finished in 0.66428s, attributes: file_id=115039929c28
2025-05-26 13:06:55,948 SpawnPoolWorker-37 DEBUG    upload finished in 0.664766s, attributes: file_id=115039929c28
2025-05-26 13:06:55,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:56,109 SpawnPoolWorker-40 DEBUG    upload finished in 0.943679s, attributes: file_id=986b355da2bf
2025-05-26 13:06:56,110 SpawnPoolWorker-40 DEBUG    upload finished in 0.944152s, attributes: file_id=986b355da2bf
upload:  69%|██████▉   | 731/1056 [01:21<00:39,  8.17it/s]2025-05-26 13:06:56,112 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/37af029ecc77.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:56,188 SpawnPoolWorker-39 DEBUG    upload finished in 0.706302s, attributes: file_id=05d28d68a728
2025-05-26 13:06:56,188 SpawnPoolWorker-39 DEBUG    upload finished in 0.706765s, attributes: file_id=05d28d68a728
2025-05-26 13:06:56

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:56,337 SpawnPoolWorker-38 DEBUG    upload finished in 0.739297s, attributes: file_id=986f7d5d8ec7
2025-05-26 13:06:56,337 SpawnPoolWorker-38 DEBUG    upload finished in 0.739844s, attributes: file_id=986f7d5d8ec7
2025-05-26 13:06:56,339 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/032ab8a11c7c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:56,446 SpawnPoolWorker-35 DEBUG    upload finished in 0.716914s, attributes: file_id=e9bec0f15f18
2025-05-26 13:06:56,447 SpawnPoolWorker-35 DEBUG    upload finished in 0.717493s, attributes: file_id=e9bec0f15f18
upload:  70%|██████▉   |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:56,579 SpawnPoolWorker-41 DEBUG    upload finished in 1.544143s, attributes: file_id=4ad992878e4d
2025-05-26 13:06:56,579 SpawnPoolWorker-41 DEBUG    upload finished in 1.544653s, attributes: file_id=4ad992878e4d
2025-05-26 13:06:56,581 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16127e212826.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:56,697 SpawnPoolWorker-34 DEBUG    upload finished in 0.835955s, attributes: file_id=3b148abb26f3
2025-05-26 13:06:56,697 SpawnPoolWorker-34 DEBUG    upload finished in 0.836493s, attributes: file_id=3b148abb26f3
upload:  70%|██████▉   | 737/1056 [01:21<00:35,  9.09it/s]2025-05-26 13:06:56

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:56,793 SpawnPoolWorker-37 DEBUG    upload finished in 0.843912s, attributes: file_id=a307861850d2
2025-05-26 13:06:56,793 SpawnPoolWorker-37 DEBUG    upload finished in 0.844372s, attributes: file_id=a307861850d2
2025-05-26 13:06:56,795 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b34ed036d93a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:56,937 SpawnPoolWorker-39 DEBUG    upload finished in 0.748068s, attributes: file_id=07ebccce8756
2025-05-26 13:06:56,938 SpawnPoolWorker-39 DEBUG    upload finished in 0.748576s, attributes: file_id=07ebccce8756
upload:  70%|██████▉   |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:57,053 SpawnPoolWorker-40 DEBUG    upload finished in 0.942696s, attributes: file_id=37af029ecc77
2025-05-26 13:06:57,054 SpawnPoolWorker-40 DEBUG    upload finished in 0.943207s, attributes: file_id=37af029ecc77
upload:  70%|███████   | 740/1056 [01:22<00:35,  8.78it/s]2025-05-26 13:06:57,056 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cb1161a3156e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:57,150 SpawnPoolWorker-36 DEBUG    upload finished in 0.925163s, attributes: file_id=16765ae985f6
2025-05-26 13:06:57,150 SpawnPoolWorker-36 DEBUG    upload finished in 0.925613s, attributes: file_id=16765ae985f6
2025-05-26 13:06:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:57,374 SpawnPoolWorker-41 DEBUG    upload finished in 0.79432s, attributes: file_id=16127e212826
2025-05-26 13:06:57,375 SpawnPoolWorker-41 DEBUG    upload finished in 0.794757s, attributes: file_id=16127e212826
upload:  70%|███████   | 743/1056 [01:22<00:34,  8.98it/s]2025-05-26 13:06:57,376 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0a5b4f654033.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:57,494 SpawnPoolWorker-34 DEBUG    upload finished in 0.795367s, attributes: file_id=dcd3d342667c
2025-05-26 13:06:57,494 SpawnPoolWorker-34 DEBUG    upload finished in 0.795799s, attributes: file_id=dcd3d342667c
upload:  70%|███████   | 744/1056 [01:22<00:35,  8.84it/s]2025-05-26 13:06:57,496 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/084057ae8b52.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:57,601 SpawnPoolWorker-35 DEBUG    upload finished in 1.152834s, attributes: file_id=d267bd405013
2025-05-26 13:06:57,601 SpawnPoolWorker-35 DEBUG    upload finished in 1.153352s, attributes: file_id=d267bd405013
upload:  71%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:57,741 SpawnPoolWorker-37 DEBUG    upload finished in 0.945865s, attributes: file_id=b34ed036d93a
2025-05-26 13:06:57,741 SpawnPoolWorker-37 DEBUG    upload finished in 0.946336s, attributes: file_id=b34ed036d93a
upload:  71%|███████   | 746/1056 [01:22<00:36,  8.41it/s]2025-05-26 13:06:57,742 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/69419522689e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:57,862 SpawnPoolWorker-39 DEBUG    upload finished in 0.923393s, attributes: file_id=ff0dd8a407d4
2025-05-26 13:06:57,863 SpawnPoolWorker-39 DEBUG    upload finished in 0.923881s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:57,993 SpawnPoolWorker-38 DEBUG    upload finished in 0.737576s, attributes: file_id=1713d160caa0
2025-05-26 13:06:57,993 SpawnPoolWorker-38 DEBUG    upload finished in 0.738075s, attributes: file_id=1713d160caa0
upload:  71%|███████   | 748/1056 [01:22<00:37,  8.15it/s]2025-05-26 13:06:57,995 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/27d93086bf59.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:58,099 SpawnPoolWorker-40 DEBUG    upload finished in 1.044421s, attributes: file_id=cb1161a3156e
2025-05-26 13:06:58,100 SpawnPoolWorker-40 DEBUG    upload finished in 1.045015s, attributes: file_id=cb1161a3156e
upload:  71%|███████   | 749/1056 [01:23<00:36,  8.47it/s]2025-05-26 13:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:58,233 SpawnPoolWorker-41 DEBUG    upload finished in 0.857417s, attributes: file_id=0a5b4f654033
2025-05-26 13:06:58,234 SpawnPoolWorker-41 DEBUG    upload finished in 0.857934s, attributes: file_id=0a5b4f654033
upload:  71%|███████   | 750/1056 [01:23<00:37,  8.15it/s]2025-05-26 13:06:58,235 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d24694ae898.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:58,518 SpawnPoolWorker-34 DEBUG    upload finished in 1.023068s, attributes: file_id=084057ae8b52
2025-05-26 13:06:58,518 SpawnPoolWorker-34 DEBUG    upload finished in 1.023519s, attributes: file_id=084057ae8b52
upload:  71%|███████   | 751/1056 [01:23<00:51,  5.89it/s]2025-05-26 13:06:58,520 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/65c217cbdef1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:58,661 SpawnPoolWorker-36 DEBUG    upload finished in 1.510191s, attributes: file_id=c8e3bf49a17d
2025-05-26 13:06:58,662 SpawnPoolWorker-36 DEBUG    upload finished in 1.510699s, attributes: file_id=c8e3bf49a17d
upload:  71%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:58,805 SpawnPoolWorker-37 DEBUG    upload finished in 1.062595s, attributes: file_id=69419522689e
2025-05-26 13:06:58,805 SpawnPoolWorker-37 DEBUG    upload finished in 1.063077s, attributes: file_id=69419522689e
upload:  71%|███████▏  | 753/1056 [01:23<00:47,  6.39it/s]2025-05-26 13:06:58,807 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c9783a8d2c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:59,029 SpawnPoolWorker-39 DEBUG    upload finished in 1.165407s, attributes: file_id=9f78adaef5b1
2025-05-26 13:06:59,030 SpawnPoolWorker-39 DEBUG    upload finished in 1.166169s, attributes: file_id=9f78adaef5b1
upload:  71%|███████▏  | 754/1056 [01:24<00:53,  5.65it/s]2025-05-26 13:06:59,033 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fa2b12bd9cbc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:59,145 SpawnPoolWorker-40 DEBUG    upload finished in 1.044175s, attributes: file_id=01a14730d0fc
2025-05-26 13:06:59,146 SpawnPoolWorker-40 DEBUG    upload finished in 1.044643s, attributes: file_id=01a14730d0fc
upload:  71%|███████▏  | 755/1056 [01:24<00:47,  6.30it/s]2025-05-26 13:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:06:59,297 SpawnPoolWorker-35 DEBUG    upload finished in 1.694379s, attributes: file_id=8cc34ffd2ce9
2025-05-26 13:06:59,298 SpawnPoolWorker-35 DEBUG    upload finished in 1.695178s, attributes: file_id=8cc34ffd2ce9
upload:  72%|███████▏  | 756/1056 [01:24<00:47,  6.38it/s]2025-05-26 13:06:59,301 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5fb71abe2003.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:06:59,391 SpawnPoolWorker-38 DEBUG    upload finished in 1.39685s, attributes: file_id=27d93086bf59
2025-05-26 13:06:59,392 SpawnPoolWorker-38 DEBUG    upload finished in 1.397321s, attributes: file_id=27d93086bf59
2025-05-26 13:06:59,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:59,534 SpawnPoolWorker-41 DEBUG    upload finished in 1.299125s, attributes: file_id=9d24694ae898
2025-05-26 13:06:59,534 SpawnPoolWorker-41 DEBUG    upload finished in 1.299637s, attributes: file_id=9d24694ae898
upload:  72%|███████▏  | 758/1056 [01:24<00:41,  7.19it/s]2025-05-26 13:06:59,536 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5451e1ba3236.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:59,671 SpawnPoolWorker-37 DEBUG    upload finished in 0.864605s, attributes: file_id=6c9783a8d2c5
2025-05-26 13:06:59,671 SpawnPoolWorker-37 DEBUG    upload finished in 0.865014s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:06:59,791 SpawnPoolWorker-36 DEBUG    upload finished in 1.127995s, attributes: file_id=90877ff19593
2025-05-26 13:06:59,791 SpawnPoolWorker-36 DEBUG    upload finished in 1.128524s, attributes: file_id=90877ff19593
upload:  72%|███████▏  | 760/1056 [01:24<00:39,  7.48it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:06:59,793 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8bbf049b542e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:06:59,931 SpawnPoolWorker-34 DEBUG    upload finished in 1.411222s, attributes: file_id=65c217cbdef1
2025-05-26 13:06:59,931 SpawnPoolWorker-34 DEBUG    upload finished in 1.411673s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:00,021 SpawnPoolWorker-35 DEBUG    upload finished in 0.721464s, attributes: file_id=5fb71abe2003
2025-05-26 13:07:00,022 SpawnPoolWorker-35 DEBUG    upload finished in 0.722505s, attributes: file_id=5fb71abe2003
2025-05-26 13:07:00,024 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9fb9ed894fa3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:00,170 SpawnPoolWorker-39 DEBUG    upload finished in 1.137636s, attributes: file_id=fa2b12bd9cbc
2025-05-26 13:07:00,171 SpawnPoolWorker-39 DEBUG    upload finished in 1.138501s, attributes: file_id=fa2b12bd9cbc
upload:  72%|███████▏  | 763/1056 [01:25<00:37,  7.78it/s]2025-05-26 13:07:00

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:00,305 SpawnPoolWorker-41 DEBUG    upload finished in 0.769378s, attributes: file_id=5451e1ba3236
2025-05-26 13:07:00,305 SpawnPoolWorker-41 DEBUG    upload finished in 0.769991s, attributes: file_id=5451e1ba3236
upload:  72%|███████▏  | 764/1056 [01:25<00:37,  7.69it/s]2025-05-26 13:07:00,308 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2a0b1657b490.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:00,401 SpawnPoolWorker-38 DEBUG    upload finished in 1.008472s, attributes: file_id=7a7cd88348fc
2025-05-26 13:07:00,402 SpawnPoolWorker-38 DEBUG    upload finished in 1.008988s, attributes: file_id=7a7cd88348fc
2025-05-26 13:07:00

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:00,517 SpawnPoolWorker-36 DEBUG    upload finished in 0.7248s, attributes: file_id=8bbf049b542e
2025-05-26 13:07:00,518 SpawnPoolWorker-36 DEBUG    upload finished in 0.725553s, attributes: file_id=8bbf049b542e
upload:  73%|███████▎  | 766/1056 [01:25<00:34,  8.30it/s]2025-05-26 13:07:00,522 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/78189fca4004.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:00,622 SpawnPoolWorker-40 DEBUG    upload finished in 1.475182s, attributes: file_id=42bc03ea34f7
2025-05-26 13:07:00,622 SpawnPoolWorker-40 DEBUG    upload finished in 1.475655s, attributes: file_id=42bc03ea34f7
upload:  73%|███████▎

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:00,742 SpawnPoolWorker-37 DEBUG    upload finished in 1.070332s, attributes: file_id=4461abda905c
2025-05-26 13:07:00,743 SpawnPoolWorker-37 DEBUG    upload finished in 1.070763s, attributes: file_id=4461abda905c
upload:  73%|███████▎  | 768/1056 [01:25<00:33,  8.49it/s]2025-05-26 13:07:00,744 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/730e4d215721.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:00,836 SpawnPoolWorker-35 DEBUG    upload finished in 0.812374s, attributes: file_id=9fb9ed894fa3
2025-05-26 13:07:00,836 SpawnPoolWorker-35 DEBUG    upload finished in 0.812913s, attributes: file_id=9fb9ed894fa3
2025-05-26 13:07:00

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:00,948 SpawnPoolWorker-34 DEBUG    upload finished in 1.015443s, attributes: file_id=86d23bcc8498
2025-05-26 13:07:00,948 SpawnPoolWorker-34 DEBUG    upload finished in 1.016004s, attributes: file_id=86d23bcc8498
upload:  73%|███████▎  | 770/1056 [01:25<00:31,  8.96it/s]2025-05-26 13:07:00,950 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8d9b1a142a34.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:01,074 SpawnPoolWorker-39 DEBUG    upload finished in 0.9022s, attributes: file_id=cc44398065d2
2025-05-26 13:07:01,074 SpawnPoolWorker-39 DEBUG    upload finished in 0.902661s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:01,218 SpawnPoolWorker-41 DEBUG    upload finished in 0.911426s, attributes: file_id=2a0b1657b490
2025-05-26 13:07:01,219 SpawnPoolWorker-41 DEBUG    upload finished in 0.912025s, attributes: file_id=2a0b1657b490
upload:  73%|███████▎  | 772/1056 [01:26<00:34,  8.19it/s]2025-05-26 13:07:01,221 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c9ffe0b988f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:01,338 SpawnPoolWorker-38 DEBUG    upload finished in 0.934798s, attributes: file_id=a01f088658de
2025-05-26 13:07:01,338 SpawnPoolWorker-38 DEBUG    upload finished in 0.935333s, attributes: file_id=a01f088658de
upload:  73%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:01,478 SpawnPoolWorker-36 DEBUG    upload finished in 0.958007s, attributes: file_id=78189fca4004
2025-05-26 13:07:01,479 SpawnPoolWorker-36 DEBUG    upload finished in 0.958757s, attributes: file_id=78189fca4004
upload:  73%|███████▎  | 774/1056 [01:26<00:35,  7.90it/s]2025-05-26 13:07:01,481 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4af1b3ff6247.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:01,579 SpawnPoolWorker-37 DEBUG    upload finished in 0.835419s, attributes: file_id=730e4d215721
2025-05-26 13:07:01,579 SpawnPoolWorker-37 DEBUG    upload finished in 0.835886s, attributes: file_id=730e4d215721
upload:  73%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:01,692 SpawnPoolWorker-35 DEBUG    upload finished in 0.85498s, attributes: file_id=b471ab186e84
2025-05-26 13:07:01,693 SpawnPoolWorker-35 DEBUG    upload finished in 0.85558s, attributes: file_id=b471ab186e84
upload:  73%|███████▎  | 776/1056 [01:26<00:32,  8.50it/s]2025-05-26 13:07:01,695 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a2b747f02cff.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:01,971 SpawnPoolWorker-40 DEBUG    upload finished in 1.347682s, attributes: file_id=90169299f675
2025-05-26 13:07:01,971 SpawnPoolWorker-40 DEBUG    upload finished in 1.34821s, attributes: file_id=90169299f675
upload:  74%|███████▎  | 777/1056 [01:26<00:45,  6.09it/s]2025-05-26 13:07:01,974 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1ea7a1179d95.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:02,070 SpawnPoolWorker-39 DEBUG    upload finished in 0.994233s, attributes: file_id=03c32c10a58d
2025-05-26 13:07:02,070 SpawnPoolWorker-39 DEBUG    upload finished in 0.994691s, attributes: file_id=03c32c10a58d
2025-05-26 13:07:02,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:02,249 SpawnPoolWorker-34 DEBUG    upload finished in 1.298934s, attributes: file_id=8d9b1a142a34
2025-05-26 13:07:02,249 SpawnPoolWorker-34 DEBUG    upload finished in 1.299475s, attributes: file_id=8d9b1a142a34
upload:  74%|███████▍  | 779/1056 [01:27<00:42,  6.55it/s]2025-05-26 13:07:02,250 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c92489bb1018.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:02,367 SpawnPoolWorker-41 DEBUG    upload finished in 1.147124s, attributes: file_id=6c9ffe0b988f
2025-05-26 13:07:02,367 SpawnPoolWorker-41 DEBUG    upload finished in 1.147578s, attributes: file_id=6c9ffe0b988f
upload:  74%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:02,489 SpawnPoolWorker-37 DEBUG    upload finished in 0.908423s, attributes: file_id=198cf2d90161
2025-05-26 13:07:02,489 SpawnPoolWorker-37 DEBUG    upload finished in 0.90888s, attributes: file_id=198cf2d90161
upload:  74%|███████▍  | 781/1056 [01:27<00:38,  7.22it/s]2025-05-26 13:07:02,491 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3f17feed24ac.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:02,600 SpawnPoolWorker-38 DEBUG    upload finished in 1.260525s, attributes: file_id=4ba829eaf35b
2025-05-26 13:07:02,600 SpawnPoolWorker-38 DEBUG    upload finished in 1.261005s, attributes: file_id=4ba829eaf35b
upload:  74%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:02,734 SpawnPoolWorker-36 DEBUG    upload finished in 1.253997s, attributes: file_id=4af1b3ff6247
2025-05-26 13:07:02,734 SpawnPoolWorker-36 DEBUG    upload finished in 1.254637s, attributes: file_id=4af1b3ff6247
upload:  74%|███████▍  | 783/1056 [01:27<00:36,  7.58it/s]2025-05-26 13:07:02,736 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c840eb7d53d8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:02,820 SpawnPoolWorker-39 DEBUG    upload finished in 0.748627s, attributes: file_id=d0039da21aa1
2025-05-26 13:07:02,820 SpawnPoolWorker-39 DEBUG    upload finished in 0.749095s, attributes: file_id=d0039da21aa1
2025-05-26 13:07:02,822 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:02,995 SpawnPoolWorker-34 DEBUG    upload finished in 0.744615s, attributes: file_id=c92489bb1018
2025-05-26 13:07:02,995 SpawnPoolWorker-34 DEBUG    upload finished in 0.745111s, attributes: file_id=c92489bb1018
upload:  74%|███████▍  | 785/1056 [01:27<00:35,  7.62it/s]2025-05-26 13:07:02,997 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bbf468830b38.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:03,066 SpawnPoolWorker-41 DEBUG    upload finished in 0.69743s, attributes: file_id=6b8d2097201f
2025-05-26 13:07:03,066 SpawnPoolWorker-41 DEBUG    upload finished in 0.697916s, attributes: file_id=6b8d2097201f
2025-05-26 13:07:03,067 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:07:03,272 SpawnPoolWorker-37 DEBUG    upload finished in 0.781846s, attributes: file_id=3f17feed24ac
2025-05-26 13:07:03,272 SpawnPoolWorker-37 DEBUG    upload finished in 0.782316s, attributes: file_id=3f17feed24ac
2025-05-26 13:07:03,274 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1d0b61e2d2df.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:03,398 SpawnPoolWorker-35 DEBUG    upload finished in 1.703791s, attributes: file_id=a2b747f02cff
2025-05-26 13:07:03,398 SpawnPoolWorker-35 DEBUG    upload finished in 1.704403s, attributes: file_id=a2b747f02cff
upload:  75%|███████▍  |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:03,521 SpawnPoolWorker-36 DEBUG    upload finished in 0.78532s, attributes: file_id=c840eb7d53d8
2025-05-26 13:07:03,521 SpawnPoolWorker-36 DEBUG    upload finished in 0.785782s, attributes: file_id=c840eb7d53d8
upload:  75%|███████▍  | 790/1056 [01:28<00:30,  8.65it/s]2025-05-26 13:07:03,523 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b53a1074a950.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:03,633 SpawnPoolWorker-38 DEBUG    upload finished in 1.031448s, attributes: file_id=44112b5c58ee
2025-05-26 13:07:03,633 SpawnPoolWorker-38 DEBUG    upload finished in 1.031871s, attributes: file_id=44112b5c58ee
upload:  75%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:03,745 SpawnPoolWorker-39 DEBUG    upload finished in 0.92377s, attributes: file_id=cd5c108a236c
2025-05-26 13:07:03,745 SpawnPoolWorker-39 DEBUG    upload finished in 0.924256s, attributes: file_id=cd5c108a236c
upload:  75%|███████▌  | 792/1056 [01:28<00:30,  8.76it/s]2025-05-26 13:07:03,747 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce9ca9ce462c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:03,864 SpawnPoolWorker-34 DEBUG    upload finished in 0.867883s, attributes: file_id=bbf468830b38
2025-05-26 13:07:03,865 SpawnPoolWorker-34 DEBUG    upload finished in 0.868368s, attributes: file_id=bbf468830b38
upload:  75%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:04,096 SpawnPoolWorker-41 DEBUG    upload finished in 1.028965s, attributes: file_id=dae7750f0fa0
2025-05-26 13:07:04,096 SpawnPoolWorker-41 DEBUG    upload finished in 1.029476s, attributes: file_id=dae7750f0fa0
upload:  75%|███████▌  | 794/1056 [01:29<00:38,  6.81it/s]2025-05-26 13:07:04,098 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b5a3e3abfdbb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:04,174 SpawnPoolWorker-35 DEBUG    upload finished in 0.774325s, attributes: file_id=33ebfd45f56e
2025-05-26 13:07:04,174 SpawnPoolWorker-35 DEBUG    upload finished in 0.774851s, attributes: file_id=33ebfd45f56e
2025-05-26 13:07:04

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:04,302 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/50cb06238bc2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:04,430 SpawnPoolWorker-40 DEBUG    upload finished in 1.250299s, attributes: file_id=888fb21ca63a
2025-05-26 13:07:04,430 SpawnPoolWorker-40 DEBUG    upload finished in 1.250674s, attributes: file_id=888fb21ca63a
upload:  75%|███████▌  | 797/1056 [01:29<00:33,  7.82it/s]2025-05-26 13:07:04,431 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bcb99853ac0d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:04,592 SpawnPoolWorker-38 DEBUG    upload finished in 0.958425s, attributes: file_id=ac4e76da28b5
2025-05-26 13:07:04,593 SpawnPoolWorker-38 DEBUG    upload finished in 0.95886s, attributes: file_id=ac4e76da28b5
upload:  76%|███████▌  | 798/1056 [01:29<00:35,  7.32it/s]2025-05-26 13:07:04,595 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/939fefd52114.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:04,694 SpawnPoolWorker-36 DEBUG    upload finished in 1.171391s, attributes: file_id=b53a1074a950
2025-05-26 13:07:04,694 SpawnPoolWorker-36 DEBUG    upload finished in 1.171923s, attributes: file_id=b53a1074a950
upload:  76%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:04,849 SpawnPoolWorker-39 DEBUG    upload finished in 1.102328s, attributes: file_id=ce9ca9ce462c
2025-05-26 13:07:04,849 SpawnPoolWorker-39 DEBUG    upload finished in 1.102739s, attributes: file_id=ce9ca9ce462c
upload:  76%|███████▌  | 800/1056 [01:29<00:34,  7.41it/s]2025-05-26 13:07:04,851 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/15bef4419202.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:04,930 SpawnPoolWorker-41 DEBUG    upload finished in 0.83249s, attributes: file_id=b5a3e3abfdbb
2025-05-26 13:07:04,931 SpawnPoolWorker-41 DEBUG    upload finished in 0.833043s, attributes: file_id=b5a3e3abfdbb
2025-05-26 13:07:04,933 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
2025-05-26 13:07:05,143 SpawnPoolWorker-37 DEBUG    upload finished in 0.840857s, attributes: file_id=50cb06238bc2
2025-05-26 13:07:05,143 SpawnPoolWorker-37 DEBUG    upload finished in 0.841455s, attributes: file_id=50cb06238bc2
upload:  76%|███████▌  | 803/1056 [01:30<00:29,  8.71it/s]2025-05-26 13:07:05,146 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/08e3e82cebbe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:05,232 SpawnPoolWorker-40 DEBUG    upload finished in 0.800471s, attributes: file_id=bcb99853ac0d
2025-05-26 13:07:05,232 SpawnPoolWorker-40 DEBUG    upload finished in 0.80102s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:05,397 SpawnPoolWorker-34 DEBUG    upload finished in 1.531832s, attributes: file_id=3cee685d4c29
upload:  76%|███████▌  | 805/1056 [01:30<00:30,  8.24it/s]2025-05-26 13:07:05,398 SpawnPoolWorker-34 DEBUG    upload finished in 1.532539s, attributes: file_id=3cee685d4c29
2025-05-26 13:07:05,416 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b3837b61cefb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:05,481 SpawnPoolWorker-36 DEBUG    upload finished in 0.786172s, attributes: file_id=ed83f2d51e5d
2025-05-26 13:07:05,481 SpawnPoolWorker-36 DEBUG    upload finished in 0.786673s, attributes: file_id=ed83f2d51e5d
2025-05-26 13:07:05,483 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:05,638 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f81874fff59.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:06,046 SpawnPoolWorker-39 DEBUG    upload finished in 1.195189s, attributes: file_id=15bef4419202
2025-05-26 13:07:06,046 SpawnPoolWorker-39 DEBUG    upload finished in 1.196043s, attributes: file_id=15bef4419202
upload:  77%|███████▋  | 808/1056 [01:31<00:44,  5.56it/s]2025-05-26 13:07:06,050 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/610b966ccf9a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:06,205 SpawnPoolWorker-41 DEBUG    upload finished in 1.272855s, attributes: file_id=5c3d158dd18f
2025-05-26 13:07:06,210 SpawnPoolWorker-35 DEBUG    upload finished in 1.176198s, attributes: file_id=6c787abc4291
2025-05-26 13:07:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:06,596 SpawnPoolWorker-37 DEBUG    upload finished in 1.450789s, attributes: file_id=08e3e82cebbe
2025-05-26 13:07:06,596 SpawnPoolWorker-37 DEBUG    upload finished in 1.451389s, attributes: file_id=08e3e82cebbe
upload:  77%|███████▋  | 811/1056 [01:31<00:44,  5.47it/s]2025-05-26 13:07:06,598 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5c2b17e060ff.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:06,820 SpawnPoolWorker-40 DEBUG    upload finished in 1.58668s, attributes: file_id=bc6ee0327487
2025-05-26 13:07:06,821 SpawnPoolWorker-40 DEBUG    upload finished in 1.587272s, attributes: file_id=bc6ee0327487
upload:  77%|███████▋  | 812/1056 [01:31<00:46,  5.21it/s]2025-05-26 13:07:06,823 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f7716274595e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:06,935 SpawnPoolWorker-36 DEBUG    upload finished in 1.452835s, attributes: file_id=2b441ab258cb
2025-05-26 13:07:06,939 SpawnPoolWorker-36 DEBUG    upload finished in 1.456447s, attributes: file_id=2b441ab258cb
upload:  77%|███████▋  | 813/1056 [01:31<00:42,  5.74it/s]2025-05-26 13:0

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:07,097 SpawnPoolWorker-35 DEBUG    upload finished in 0.88292s, attributes: file_id=9fbe44a56a82
2025-05-26 13:07:07,098 SpawnPoolWorker-35 DEBUG    upload finished in 0.883669s, attributes: file_id=9fbe44a56a82
upload:  77%|███████▋  | 814/1056 [01:32<00:41,  5.87it/s]2025-05-26 13:07:07,099 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/732f4fb1ec47.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:07,238 SpawnPoolWorker-38 DEBUG    upload finished in 1.601537s, attributes: file_id=1f81874fff59
2025-05-26 13:07:07,239 SpawnPoolWorker-38 DEBUG    upload finished in 1.602352s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:07,354 SpawnPoolWorker-34 DEBUG    upload finished in 1.940579s, attributes: file_id=b3837b61cefb
2025-05-26 13:07:07,355 SpawnPoolWorker-34 DEBUG    upload finished in 1.941418s, attributes: file_id=b3837b61cefb
upload:  77%|███████▋  | 816/1056 [01:32<00:35,  6.68it/s]2025-05-26 13:07:07,356 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/499664900a88.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:07,550 SpawnPoolWorker-41 DEBUG    upload finished in 1.336372s, attributes: file_id=a8810caeab74
2025-05-26 13:07:07,551 SpawnPoolWorker-41 DEBUG    upload finished in 1.337045s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:07,698 SpawnPoolWorker-37 DEBUG    upload finished in 1.10077s, attributes: file_id=5c2b17e060ff
2025-05-26 13:07:07,698 SpawnPoolWorker-37 DEBUG    upload finished in 1.101339s, attributes: file_id=5c2b17e060ff
upload:  77%|███████▋  | 818/1056 [01:32<00:37,  6.31it/s]2025-05-26 13:07:07,700 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8d038b76dd2d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:07,853 SpawnPoolWorker-39 DEBUG    upload finished in 1.803914s, attributes: file_id=610b966ccf9a
2025-05-26 13:07:07,853 SpawnPoolWorker-39 DEBUG    upload finished in 1.804573s, attributes: file_id=610b966ccf9a
upload:  78%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:08,074 SpawnPoolWorker-38 DEBUG    upload finished in 0.833789s, attributes: file_id=9c273ea8268a
2025-05-26 13:07:08,075 SpawnPoolWorker-38 DEBUG    upload finished in 0.834662s, attributes: file_id=9c273ea8268a
upload:  78%|███████▊  | 820/1056 [01:33<00:41,  5.67it/s]2025-05-26 13:07:08,077 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4c0b74369bd1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:08,230 SpawnPoolWorker-35 DEBUG    upload finished in 1.131192s, attributes: file_id=732f4fb1ec47
2025-05-26 13:07:08,231 SpawnPoolWorker-35 DEBUG    upload finished in 1.131987s, attributes: file_id=732f4fb1ec47
upload:  78%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:08,394 SpawnPoolWorker-36 DEBUG    upload finished in 1.452927s, attributes: file_id=ee2336146769
2025-05-26 13:07:08,394 SpawnPoolWorker-36 DEBUG    upload finished in 1.453583s, attributes: file_id=ee2336146769
upload:  78%|███████▊  | 822/1056 [01:33<00:39,  5.94it/s]2025-05-26 13:07:08,398 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8a78498fca58.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:08,491 SpawnPoolWorker-34 DEBUG    upload finished in 1.134934s, attributes: file_id=499664900a88
2025-05-26 13:07:08,491 SpawnPoolWorker-34 DEBUG    upload finished in 1.135442s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:08,618 SpawnPoolWorker-41 DEBUG    upload finished in 1.066085s, attributes: file_id=81f5e27cbdb0
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:08,619 SpawnPoolWorker-41 DEBUG    upload finished in 1.066819s, attributes: file_id=81f5e27cbdb0
upload:  78%|███████▊  | 824/1056 [01:33<00:33,  7.02it/s]2025-05-26 13:07:08,622 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8d72333e5ab.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:08,720 SpawnPoolWorker-40 DEBUG    upload finished in 1.898025s, attributes: file_id=f7716274595e
2025-05-26 13:07:08,721 SpawnPoolWorker-40 DEBUG    upload finished in 1.898624s, attributes: file_id=f7716274595e
upload:  78%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:08,875 SpawnPoolWorker-39 DEBUG    upload finished in 1.02033s, attributes: file_id=442218c526ae
2025-05-26 13:07:08,875 SpawnPoolWorker-39 DEBUG    upload finished in 1.020805s, attributes: file_id=442218c526ae
upload:  78%|███████▊  | 826/1056 [01:33<00:31,  7.24it/s]2025-05-26 13:07:08,876 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9dcd7e2e40a4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:09,028 SpawnPoolWorker-37 DEBUG    upload finished in 1.328121s, attributes: file_id=8d038b76dd2d
2025-05-26 13:07:09,028 SpawnPoolWorker-37 DEBUG    upload finished in 1.328647s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:09,103 SpawnPoolWorker-35 DEBUG    upload finished in 0.870295s, attributes: file_id=b779c1d4aa67
2025-05-26 13:07:09,103 SpawnPoolWorker-35 DEBUG    upload finished in 0.870891s, attributes: file_id=b779c1d4aa67
2025-05-26 13:07:09,105 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/07d64ee232e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:09,224 SpawnPoolWorker-34 DEBUG    upload finished in 0.731414s, attributes: file_id=6983bc428288
2025-05-26 13:07:09,224 SpawnPoolWorker-34 DEBUG    upload finished in 0.731945s, attributes: file_id=6983bc428288
upload:  79%|███████▊  |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:09,411 SpawnPoolWorker-36 DEBUG    upload finished in 1.015494s, attributes: file_id=8a78498fca58
2025-05-26 13:07:09,411 SpawnPoolWorker-36 DEBUG    upload finished in 1.016058s, attributes: file_id=8a78498fca58
upload:  79%|███████▊  | 830/1056 [01:34<00:31,  7.24it/s]2025-05-26 13:07:09,414 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8045ccf45870.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:09,663 SpawnPoolWorker-38 DEBUG    upload finished in 1.586141s, attributes: file_id=4c0b74369bd1
2025-05-26 13:07:09,663 SpawnPoolWorker-38 DEBUG    upload finished in 1.586874s, attributes: file_id=4c0b74369bd1
upload:  79%|███████▊  | 831/1056 [01:34<00:37,  5.98it/s]2025-05-26 13:07:09,666 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0fba78782460.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:09,853 SpawnPoolWorker-39 DEBUG    upload finished in 0.976999s, attributes: file_id=9dcd7e2e40a4
2025-05-26 13:07:09,853 SpawnPoolWorker-39 DEBUG    upload finished in 0.977429s, attributes: file_id=9dcd7e2e40a4
upload:  79%|███████▉  | 832/1056 [01:34<00:38,  5.78it/s]2025-05-26 13:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:10,105 SpawnPoolWorker-35 DEBUG    upload finished in 1.000849s, attributes: file_id=07d64ee232e8
2025-05-26 13:07:10,105 SpawnPoolWorker-35 DEBUG    upload finished in 1.001291s, attributes: file_id=07d64ee232e8
upload:  79%|███████▉  | 833/1056 [01:35<00:43,  5.13it/s]2025-05-26 13:07:10,107 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1e5b1d66889a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:10,270 SpawnPoolWorker-40 DEBUG    upload finished in 1.547523s, attributes: file_id=541de4f8b911
2025-05-26 13:07:10,270 SpawnPoolWorker-40 DEBUG    upload finished in 1.548442s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:10,391 SpawnPoolWorker-37 DEBUG    upload finished in 1.36138s, attributes: file_id=9ef8039c0e72
2025-05-26 13:07:10,391 SpawnPoolWorker-37 DEBUG    upload finished in 1.361893s, attributes: file_id=9ef8039c0e72
upload:  79%|███████▉  | 835/1056 [01:35<00:37,  5.97it/s]2025-05-26 13:07:10,394 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ff15cc6e9193.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:10,505 SpawnPoolWorker-34 DEBUG    upload finished in 1.279429s, attributes: file_id=de24a8a5b4c1
2025-05-26 13:07:10,505 SpawnPoolWorker-34 DEBUG    upload finished in 1.27997s, attributes: file_id=de24a8a5b4c1
upload:  79%|███████▉

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:10,687 SpawnPoolWorker-38 DEBUG    upload finished in 1.02116s, attributes: file_id=0fba78782460
2025-05-26 13:07:10,687 SpawnPoolWorker-38 DEBUG    upload finished in 1.021694s, attributes: file_id=0fba78782460
upload:  79%|███████▉  | 837/1056 [01:35<00:35,  6.22it/s]2025-05-26 13:07:10,688 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e8443cc2efbf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:10,792 SpawnPoolWorker-39 DEBUG    upload finished in 0.93778s, attributes: file_id=d62e7bb843b4
2025-05-26 13:07:10,792 SpawnPoolWorker-39 DEBUG    upload finished in 0.938296s, attributes: file_id=d62e7bb843b4
upload:  79%|███████▉

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:10,939 SpawnPoolWorker-41 DEBUG    upload finished in 2.318599s, attributes: file_id=f8d72333e5ab
2025-05-26 13:07:10,940 SpawnPoolWorker-41 DEBUG    upload finished in 2.319213s, attributes: file_id=f8d72333e5ab
upload:  79%|███████▉  | 839/1056 [01:35<00:31,  6.89it/s]2025-05-26 13:07:10,942 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99085b75b862.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:11,059 SpawnPoolWorker-35 DEBUG    upload finished in 0.952359s, attributes: file_id=1e5b1d66889a
2025-05-26 13:07:11,059 SpawnPoolWorker-35 DEBUG    upload finished in 0.952917s, attributes: file_id=1e5b1d66889a
upload:  80%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:11,397 SpawnPoolWorker-37 DEBUG    upload finished in 1.004148s, attributes: file_id=ff15cc6e9193
2025-05-26 13:07:11,397 SpawnPoolWorker-37 DEBUG    upload finished in 1.004746s, attributes: file_id=ff15cc6e9193
upload:  80%|███████▉  | 841/1056 [01:36<00:42,  5.07it/s]2025-05-26 13:07:11,399 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b0ad568ef9f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:11,697 SpawnPoolWorker-40 DEBUG    upload finished in 1.423771s, attributes: file_id=012d89579d81
2025-05-26 13:07:11,697 SpawnPoolWorker-40 DEBUG    upload finished in 1.424677s, attributes: file_id=012d89579d81
upload:  80%|███████▉  | 842/1056 [01:36<00:48,  4.38it/s]2025-05-26 13:07:11,700 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e3e7210685e7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:11,924 SpawnPoolWorker-36 DEBUG    upload finished in 2.511301s, attributes: file_id=8045ccf45870
2025-05-26 13:07:11,924 SpawnPoolWorker-36 DEBUG    upload finished in 2.511767s, attributes: file_id=8045ccf45870
upload:  80%|███████▉  | 843/1056 [01:36<00:48,  4.39it/s]2025-05-26 13:07:11,926 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/98a9c17583cf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:12,002 SpawnPoolWorker-34 DEBUG    upload finished in 1.496214s, attributes: file_id=5365aa7b4df8
2025-05-26 13:07:12,003 SpawnPoolWorker-34 DEBUG    upload finished in 1.496647s, attributes: file_id=5365aa7b4df8
2025-05-26 13:07:12,004 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:12,223 SpawnPoolWorker-38 DEBUG    upload finished in 1.5353s, attributes: file_id=e8443cc2efbf
2025-05-26 13:07:12,224 SpawnPoolWorker-38 DEBUG    upload finished in 1.535796s, attributes: file_id=e8443cc2efbf
upload:  80%|████████  | 845/1056 [01:37<00:40,  5.21it/s]2025-05-26 13:07:12,226 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d85d12b0452b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:12,408 SpawnPoolWorker-41 DEBUG    upload finished in 1.467078s, attributes: file_id=99085b75b862
2025-05-26 13:07:12,408 SpawnPoolWorker-41 DEBUG    upload finished in 1.467704s, attributes: file_id=99085b75b862
upload:  80%|████████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:12,564 SpawnPoolWorker-37 DEBUG    upload finished in 1.165755s, attributes: file_id=b0ad568ef9f7
2025-05-26 13:07:12,565 SpawnPoolWorker-37 DEBUG    upload finished in 1.166371s, attributes: file_id=b0ad568ef9f7
upload:  80%|████████  | 847/1056 [01:37<00:37,  5.52it/s]2025-05-26 13:07:12,567 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9ff777d6147e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:12,719 SpawnPoolWorker-39 DEBUG    upload finished in 1.925605s, attributes: file_id=0b0125c395bf
2025-05-26 13:07:12,720 SpawnPoolWorker-39 DEBUG    upload finished in 1.926225s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:12,957 SpawnPoolWorker-35 DEBUG    upload finished in 1.895731s, attributes: file_id=20c60d149b66
2025-05-26 13:07:12,957 SpawnPoolWorker-35 DEBUG    upload finished in 1.896624s, attributes: file_id=20c60d149b66
upload:  80%|████████  | 849/1056 [01:37<00:39,  5.21it/s]2025-05-26 13:07:12,962 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a2f0fce650e9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:13,125 SpawnPoolWorker-40 DEBUG    upload finished in 1.426075s, attributes: file_id=e3e7210685e7
2025-05-26 13:07:13,126 SpawnPoolWorker-40 DEBUG    upload finished in 1.426839s, attributes: file_id=e3e7210685e7
upload:  80%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:13,350 SpawnPoolWorker-41 DEBUG    upload finished in 0.93914s, attributes: file_id=8d3bedb85186
2025-05-26 13:07:13,350 SpawnPoolWorker-41 DEBUG    upload finished in 0.940006s, attributes: file_id=8d3bedb85186
upload:  81%|████████  | 851/1056 [01:38<00:40,  5.09it/s]2025-05-26 13:07:13,353 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bb8752ad8138.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:13,507 SpawnPoolWorker-37 DEBUG    upload finished in 0.941277s, attributes: file_id=9ff777d6147e
2025-05-26 13:07:13,508 SpawnPoolWorker-37 DEBUG    upload finished in 0.941769s, attributes: file_id=9ff777d6147e
upload:  81%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:13,628 SpawnPoolWorker-36 DEBUG    upload finished in 1.702955s, attributes: file_id=98a9c17583cf
2025-05-26 13:07:13,629 SpawnPoolWorker-36 DEBUG    upload finished in 1.703679s, attributes: file_id=98a9c17583cf
upload:  81%|████████  | 853/1056 [01:38<00:33,  6.02it/s]2025-05-26 13:07:13,633 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/554e1d7d9220.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:13,846 SpawnPoolWorker-34 DEBUG    upload finished in 1.842819s, attributes: file_id=35bf34380c71
2025-05-26 13:07:13,847 SpawnPoolWorker-34 DEBUG    upload finished in 1.843502s, attributes: file_id=35bf34380c71
upload:  81%|████████  | 854/1056 [01:38<00:36,  5.51it/s]2025-05-26 13:07:13,849 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/700cbef98934.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:14,061 SpawnPoolWorker-38 DEBUG    upload finished in 1.836617s, attributes: file_id=d85d12b0452b
2025-05-26 13:07:14,062 SpawnPoolWorker-38 DEBUG    upload finished in 1.837167s, attributes: file_id=d85d12b0452b
upload:  81%|████████  | 855/1056 [01:39<00:38,  5.22it/s]2025-05-26 13:07:14,064 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/55192b59e5d0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:14,294 SpawnPoolWorker-40 DEBUG    upload finished in 1.166213s, attributes: file_id=9ad51f537c01
2025-05-26 13:07:14,295 SpawnPoolWorker-40 DEBUG    upload finished in 1.167123s, attributes: file_id=9ad51f537c01
upload:  81%|████████  | 856/1056 [01:39<00:40,  4.90it/s]2025-05-26 13:07:14,298 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/96bafd594da0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:14,531 SpawnPoolWorker-36 DEBUG    upload finished in 0.898957s, attributes: file_id=554e1d7d9220
2025-05-26 13:07:14,532 SpawnPoolWorker-36 DEBUG    upload finished in 0.899897s, attributes: file_id=554e1d7d9220
upload:  81%|████████  | 857/1056 [01:39<00:42,  4.68it/s]2025-05-26 13:07:14,535 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2a3d94e6c469.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:14,656 SpawnPoolWorker-37 DEBUG    upload finished in 1.147097s, attributes: file_id=9c3abec38969
2025-05-26 13:07:14,656 SpawnPoolWorker-37 DEBUG    upload finished in 1.147682s, attributes: file_id=9c3abec38969
upload:  81%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:14,744 SpawnPoolWorker-35 DEBUG    upload finished in 1.784284s, attributes: file_id=a2f0fce650e9
2025-05-26 13:07:14,745 SpawnPoolWorker-35 DEBUG    upload finished in 1.78517s, attributes: file_id=a2f0fce650e9
2025-05-26 13:07:14,747 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a265db1d1202.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:14,817 SpawnPoolWorker-34 DEBUG    upload finished in 0.968753s, attributes: file_id=700cbef98934
2025-05-26 13:07:14,818 SpawnPoolWorker-34 DEBUG    upload finished in 0.969302s, attributes: file_id=700cbef98934
upload:  81%|████████▏ | 860/1056 [01:39<00:27,  7.24it/s]2025-05-26 13:07:14,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:15,006 SpawnPoolWorker-38 DEBUG    upload finished in 0.942166s, attributes: file_id=55192b59e5d0
2025-05-26 13:07:15,006 SpawnPoolWorker-38 DEBUG    upload finished in 0.942706s, attributes: file_id=55192b59e5d0
upload:  82%|████████▏ | 861/1056 [01:39<00:29,  6.64it/s]2025-05-26 13:07:15,008 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed3aa19003e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:15,098 SpawnPoolWorker-39 DEBUG    upload finished in 2.376188s, attributes: file_id=10e70a0da8a4
2025-05-26 13:07:15,098 SpawnPoolWorker-39 DEBUG    upload finished in 2.376808s, attributes: file_id=10e70a0da8a4
2025-05-26 13:07:15

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:15,221 SpawnPoolWorker-41 DEBUG    upload finished in 1.868424s, attributes: file_id=bb8752ad8138
2025-05-26 13:07:15,221 SpawnPoolWorker-41 DEBUG    upload finished in 1.869078s, attributes: file_id=bb8752ad8138
upload:  82%|████████▏ | 863/1056 [01:40<00:25,  7.53it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:07:15,223 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e355cdf06261.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:15,326 SpawnPoolWorker-40 DEBUG    upload finished in 1.028947s, attributes: file_id=96bafd594da0
2025-05-26 13:07:15,326 SpawnPoolWorker-40 DEBUG    upload finished in 1.02956s, attributes: file_id=96bafd594da0
upload:  82%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:15,540 SpawnPoolWorker-35 DEBUG    upload finished in 0.793878s, attributes: file_id=a265db1d1202
2025-05-26 13:07:15,541 SpawnPoolWorker-35 DEBUG    upload finished in 0.794429s, attributes: file_id=a265db1d1202
upload:  82%|████████▏ | 866/1056 [01:40<00:22,  8.42it/s]2025-05-26 13:07:15,542 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c94b66f933e2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:15,701 SpawnPoolWorker-37 DEBUG    upload finished in 1.043594s, attributes: file_id=c032697c40c1
2025-05-26 13:07:15,702 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:15,927 SpawnPoolWorker-38 DEBUG    upload finished in 0.919674s, attributes: file_id=ed3aa19003e8
2025-05-26 13:07:15,927 SpawnPoolWorker-38 DEBUG    upload finished in 0.920079s, attributes: file_id=ed3aa19003e8
upload:  82%|████████▏ | 868/1056 [01:40<00:28,  6.61it/s]2025-05-26 13:07:15,929 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/810dc473a59c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:16,116 SpawnPoolWorker-41 DEBUG    upload finished in 0.893028s, attributes: file_id=e355cdf06261
2025-05-26 13:07:16,116 SpawnPoolWorker-41 DEBUG    upload finished in 0.893587s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:16,300 SpawnPoolWorker-34 DEBUG    upload finished in 1.480878s, attributes: file_id=3e3caf8bc4f3
2025-05-26 13:07:16,300 SpawnPoolWorker-34 DEBUG    upload finished in 1.481469s, attributes: file_id=3e3caf8bc4f3
upload:  82%|████████▏ | 870/1056 [01:41<00:31,  5.98it/s]2025-05-26 13:07:16,302 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b2eeb817f77a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:16,413 SpawnPoolWorker-40 DEBUG    upload finished in 1.085013s, attributes: file_id=7ef8c3828c18
2025-05-26 13:07:16,413 SpawnPoolWorker-40 DEBUG    upload finished in 1.085615s, attributes: file_id=7ef8c3828c18
upload:  82%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:16,555 SpawnPoolWorker-39 DEBUG    upload finished in 1.456287s, attributes: file_id=b492b0faf789
2025-05-26 13:07:16,556 SpawnPoolWorker-39 DEBUG    upload finished in 1.456728s, attributes: file_id=b492b0faf789
upload:  83%|████████▎ | 872/1056 [01:41<00:27,  6.69it/s]2025-05-26 13:07:16,558 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/21c478a4df0d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:16,715 SpawnPoolWorker-38 DEBUG    upload finished in 0.786296s, attributes: file_id=810dc473a59c
2025-05-26 13:07:16,715 SpawnPoolWorker-38 DEBUG    upload finished in 0.786835s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:16,862 SpawnPoolWorker-35 DEBUG    upload finished in 1.320435s, attributes: file_id=c94b66f933e2
2025-05-26 13:07:16,862 SpawnPoolWorker-35 DEBUG    upload finished in 1.320906s, attributes: file_id=c94b66f933e2
upload:  83%|████████▎ | 874/1056 [01:41<00:27,  6.63it/s]2025-05-26 13:07:16,865 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7093271b4c77.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:16,994 SpawnPoolWorker-37 DEBUG    upload finished in 1.290737s, attributes: file_id=84e0af34bc36
2025-05-26 13:07:16,995 SpawnPoolWorker-37 DEBUG    upload finished in 1.291253s, attributes: file_id=84e0af34bc36
upload:  83%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:17,090 SpawnPoolWorker-34 DEBUG    upload finished in 0.788278s, attributes: file_id=b2eeb817f77a
2025-05-26 13:07:17,090 SpawnPoolWorker-34 DEBUG    upload finished in 0.788865s, attributes: file_id=b2eeb817f77a
2025-05-26 13:07:17,092 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/67e401a633ad.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:17,170 SpawnPoolWorker-40 DEBUG    upload finished in 0.755723s, attributes: file_id=cb742efc13aa
2025-05-26 13:07:17,171 SpawnPoolWorker-40 DEBUG    upload finished in 0.75623s, attributes: file_id=cb742efc13aa
upload:  83%|████████▎ | 877/1056 [01:42<00:21,  8.39it/s]2025-05-26 13:07:17,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:17,296 SpawnPoolWorker-36 DEBUG    upload finished in 1.893433s, attributes: file_id=1c3ba7835fcc
2025-05-26 13:07:17,296 SpawnPoolWorker-36 DEBUG    upload finished in 1.893877s, attributes: file_id=1c3ba7835fcc
upload:  83%|████████▎ | 878/1056 [01:42<00:21,  8.28it/s]2025-05-26 13:07:17,298 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5e5d26cad0f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:17,437 SpawnPoolWorker-39 DEBUG    upload finished in 0.880471s, attributes: file_id=21c478a4df0d
2025-05-26 13:07:17,438 SpawnPoolWorker-39 DEBUG    upload finished in 0.880991s, attributes: file_id=21c478a4df0d
upload:  83%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:17,571 SpawnPoolWorker-41 DEBUG    upload finished in 1.454107s, attributes: file_id=b6e79680c9f0
2025-05-26 13:07:17,572 SpawnPoolWorker-41 DEBUG    upload finished in 1.454596s, attributes: file_id=b6e79680c9f0
upload:  83%|████████▎ | 880/1056 [01:42<00:22,  7.80it/s]2025-05-26 13:07:17,575 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/72b1bac2b13b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:17,762 SpawnPoolWorker-38 DEBUG    upload finished in 1.045621s, attributes: file_id=1ffaa33353d9
2025-05-26 13:07:17,763 SpawnPoolWorker-38 DEBUG    upload finished in 1.046146s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:17,828 SpawnPoolWorker-35 DEBUG    upload finished in 0.964645s, attributes: file_id=7093271b4c77
2025-05-26 13:07:17,829 SpawnPoolWorker-35 DEBUG    upload finished in 0.965151s, attributes: file_id=7093271b4c77
2025-05-26 13:07:17,831 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b603ac2e1954.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:17,995 SpawnPoolWorker-34 DEBUG    upload finished in 0.903464s, attributes: file_id=67e401a633ad
2025-05-26 13:07:17,995 SpawnPoolWorker-34 DEBUG    upload finished in 0.903891s, attributes: file_id=67e401a633ad
upload:  84%|████████▎ |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:18,067 SpawnPoolWorker-40 DEBUG    upload finished in 0.894788s, attributes: file_id=31ed8eb15b0d
2025-05-26 13:07:18,067 SpawnPoolWorker-40 DEBUG    upload finished in 0.895231s, attributes: file_id=31ed8eb15b0d
2025-05-26 13:07:18,069 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/015fe66c660b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:18,196 SpawnPoolWorker-37 DEBUG    upload finished in 1.20064s, attributes: file_id=5eddf26368ed
2025-05-26 13:07:18,197 SpawnPoolWorker-37 DEBUG    upload finished in 1.201133s, attributes: file_id=5eddf26368ed
upload:  84%|████████▍ | 885/1056 [01:43<00:20,  8.31it/s]Removed trailing sem

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:18,320 SpawnPoolWorker-39 DEBUG    upload finished in 0.880987s, attributes: file_id=58066225cd39
2025-05-26 13:07:18,321 SpawnPoolWorker-39 DEBUG    upload finished in 0.881538s, attributes: file_id=58066225cd39
upload:  84%|████████▍ | 886/1056 [01:43<00:20,  8.26it/s]2025-05-26 13:07:18,322 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3dce009a5766.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:18,465 SpawnPoolWorker-36 DEBUG    upload finished in 1.167658s, attributes: file_id=5e5d26cad0f3
2025-05-26 13:07:18,465 SpawnPoolWorker-36 DEBUG    upload finished in 1.168229s, attributes: file_id=5e5d26cad0f3
upload:  84%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:18,595 SpawnPoolWorker-35 DEBUG    upload finished in 0.765006s, attributes: file_id=b603ac2e1954
2025-05-26 13:07:18,595 SpawnPoolWorker-35 DEBUG    upload finished in 0.765511s, attributes: file_id=b603ac2e1954
upload:  84%|████████▍ | 888/1056 [01:43<00:21,  7.85it/s]2025-05-26 13:07:18,597 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/10217514a585.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:18,692 SpawnPoolWorker-38 DEBUG    upload finished in 0.927823s, attributes: file_id=4bb1237aad47
2025-05-26 13:07:18,692 SpawnPoolWorker-38 DEBUG    upload finished in 0.928364s, attributes: file_id=4bb1237aad47
2025-05-26 13:07:18

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:18,805 SpawnPoolWorker-40 DEBUG    upload finished in 0.737096s, attributes: file_id=015fe66c660b
2025-05-26 13:07:18,805 SpawnPoolWorker-40 DEBUG    upload finished in 0.737477s, attributes: file_id=015fe66c660b
upload:  84%|████████▍ | 890/1056 [01:43<00:19,  8.47it/s]2025-05-26 13:07:18,807 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7dcc796fee8e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:18,961 SpawnPoolWorker-41 DEBUG    upload finished in 1.387573s, attributes: file_id=72b1bac2b13b
2025-05-26 13:07:18,961 SpawnPoolWorker-41 DEBUG    upload finished in 1.388196s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:19,095 SpawnPoolWorker-34 DEBUG    upload finished in 1.098611s, attributes: file_id=12c2233a0d93
2025-05-26 13:07:19,095 SpawnPoolWorker-34 DEBUG    upload finished in 1.099096s, attributes: file_id=12c2233a0d93
upload:  84%|████████▍ | 892/1056 [01:44<00:21,  7.78it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:07:19,098 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1386a83f83f1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:19,224 SpawnPoolWorker-37 DEBUG    upload finished in 1.026161s, attributes: file_id=4c7f03d092d4
2025-05-26 13:07:19,225 SpawnPoolWorker-37 DEBUG    upload finished in 1.026859s, attributes: file_id=4c7f03d092d4
upload:  85%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:19,353 SpawnPoolWorker-36 DEBUG    upload finished in 0.886179s, attributes: file_id=53e5b114eb7a
2025-05-26 13:07:19,354 SpawnPoolWorker-36 DEBUG    upload finished in 0.886732s, attributes: file_id=53e5b114eb7a
upload:  85%|████████▍ | 894/1056 [01:44<00:20,  7.76it/s]2025-05-26 13:07:19,356 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/15ff90b50959.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:19,474 SpawnPoolWorker-39 DEBUG    upload finished in 1.151859s, attributes: file_id=3dce009a5766
2025-05-26 13:07:19,474 SpawnPoolWorker-39 DEBUG    upload finished in 1.152408s, attributes: file_id=3dce009a5766
upload:  85%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:19,593 SpawnPoolWorker-35 DEBUG    upload finished in 0.996935s, attributes: file_id=10217514a585
2025-05-26 13:07:19,594 SpawnPoolWorker-35 DEBUG    upload finished in 0.997496s, attributes: file_id=10217514a585
upload:  85%|████████▍ | 896/1056 [01:44<00:19,  8.03it/s]2025-05-26 13:07:19,596 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f1f8bc1ff62.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:19,692 SpawnPoolWorker-41 DEBUG    upload finished in 0.730158s, attributes: file_id=e033dab7ebf3
2025-05-26 13:07:19,693 SpawnPoolWorker-41 DEBUG    upload finished in 0.73073s, attributes: file_id=e033dab7ebf3
2025-05-26 13:07:19,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:19,964 SpawnPoolWorker-34 DEBUG    upload finished in 0.867578s, attributes: file_id=1386a83f83f1
2025-05-26 13:07:19,965 SpawnPoolWorker-34 DEBUG    upload finished in 0.868103s, attributes: file_id=1386a83f83f1
upload:  85%|████████▌ | 899/1056 [01:44<00:20,  7.85it/s]2025-05-26 13:07:19,967 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/41514ae129ae.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:20,218 SpawnPoolWorker-40 DEBUG    upload finished in 1.412121s, attributes: file_id=7dcc796fee8e
2025-05-26 13:07:20,219 SpawnPoolWorker-40 DEBUG    upload finished in 1.412687s, attributes: file_id=7dcc796fee8e
upload:  85%|████████▌ | 901/1056 [01:45<00:19,  7.79it/s]2025-05-26 13:07:20,220 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9a9b7b42c9b8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:20,407 SpawnPoolWorker-35 DEBUG    upload finished in 0.811814s, attributes: file_id=1f1f8bc1ff62
2025-05-26 13:07:20,407 SpawnPoolWorker-35 DEBUG    upload finished in 0.812635s, attributes: file_id=1f1f8bc1ff62
upload:  85%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:20,597 SpawnPoolWorker-36 DEBUG    upload finished in 1.241946s, attributes: file_id=15ff90b50959
2025-05-26 13:07:20,597 SpawnPoolWorker-36 DEBUG    upload finished in 1.24261s, attributes: file_id=15ff90b50959
upload:  86%|████████▌ | 903/1056 [01:45<00:24,  6.34it/s]2025-05-26 13:07:20,600 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/403eeb228e8b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:20,815 SpawnPoolWorker-34 DEBUG    upload finished in 0.849404s, attributes: file_id=41514ae129ae
2025-05-26 13:07:20,815 SpawnPoolWorker-34 DEBUG    upload finished in 0.849947s, attributes: file_id=41514ae129ae
upload:  86%|████████▌ | 904/1056 [01:45<00:26,  5.71it/s]2025-05-26 13:07:20,817 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/323976cd72fb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:20,975 SpawnPoolWorker-41 DEBUG    upload finished in 1.281288s, attributes: file_id=849199a336a1
2025-05-26 13:07:20,976 SpawnPoolWorker-41 DEBUG    upload finished in 1.282014s, attributes: file_id=849199a336a1
upload:  86%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:21,129 SpawnPoolWorker-38 DEBUG    upload finished in 1.333451s, attributes: file_id=1f44eee66104
2025-05-26 13:07:21,129 SpawnPoolWorker-38 DEBUG    upload finished in 1.334231s, attributes: file_id=1f44eee66104
upload:  86%|████████▌ | 906/1056 [01:46<00:24,  6.03it/s]2025-05-26 13:07:21,132 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5a8721901b3a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:21,397 SpawnPoolWorker-39 DEBUG    upload finished in 1.921335s, attributes: file_id=9a547fbc2160
2025-05-26 13:07:21,397 SpawnPoolWorker-39 DEBUG    upload finished in 1.921904s, attributes: file_id=9a547fbc2160
upload:  86%|████████▌ | 907/1056 [01:46<00:29,  5.10it/s]2025-05-26 13:07:21,400 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ae9fb7b7832d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:21,574 SpawnPoolWorker-37 DEBUG    upload finished in 1.494076s, attributes: file_id=530e84d15e4a
2025-05-26 13:07:21,574 SpawnPoolWorker-37 DEBUG    upload finished in 1.494869s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:21,870 SpawnPoolWorker-34 DEBUG    upload finished in 1.053484s, attributes: file_id=323976cd72fb
2025-05-26 13:07:21,871 SpawnPoolWorker-34 DEBUG    upload finished in 1.05439s, attributes: file_id=323976cd72fb
upload:  86%|████████▌ | 909/1056 [01:46<00:32,  4.50it/s]2025-05-26 13:07:21,874 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1dfc4744b11f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:22,025 SpawnPoolWorker-41 DEBUG    upload finished in 1.047207s, attributes: file_id=c1bfe9457276
2025-05-26 13:07:22,026 SpawnPoolWorker-41 DEBUG    upload finished in 1.048046s, attributes: file_id=c1bfe9457276
upload:  86%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:22,129 SpawnPoolWorker-40 DEBUG    upload finished in 1.909583s, attributes: file_id=9a9b7b42c9b8
2025-05-26 13:07:22,130 SpawnPoolWorker-40 DEBUG    upload finished in 1.910056s, attributes: file_id=9a9b7b42c9b8
upload:  86%|████████▋ | 911/1056 [01:47<00:25,  5.79it/s]2025-05-26 13:07:22,132 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d7cb146de1df.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:22,255 SpawnPoolWorker-36 DEBUG    upload finished in 1.656235s, attributes: file_id=403eeb228e8b
2025-05-26 13:07:22,256 SpawnPoolWorker-36 DEBUG    upload finished in 1.656807s, attributes: file_id=403eeb228e8b
upload:  86%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:22,396 SpawnPoolWorker-39 DEBUG    upload finished in 0.996906s, attributes: file_id=ae9fb7b7832d
upload:  86%|████████▋ | 913/1056 [01:47<00:21,  6.51it/s]2025-05-26 13:07:22,397 SpawnPoolWorker-39 DEBUG    upload finished in 0.998433s, attributes: file_id=ae9fb7b7832d
2025-05-26 13:07:22,403 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/96c2ae5507e3.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:22,493 SpawnPoolWorker-37 DEBUG    upload finished in 0.916763s, attributes: file_id=b17b1e9ca6fe
2025-05-26 13:07:22,494 SpawnPoolWorker-37 DEBUG    upload finished in 0.917348s, attributes: file_id=b17b1e9ca6fe
2025-05-26 13:07:22

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:22,616 SpawnPoolWorker-35 DEBUG    upload finished in 2.206206s, attributes: file_id=49daa4ccb643
2025-05-26 13:07:22,616 SpawnPoolWorker-35 DEBUG    upload finished in 2.206857s, attributes: file_id=49daa4ccb643
upload:  87%|████████▋ | 915/1056 [01:47<00:18,  7.51it/s]2025-05-26 13:07:22,618 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d51372b81f93.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:22,791 SpawnPoolWorker-38 DEBUG    upload finished in 1.659713s, attributes: file_id=5a8721901b3a
2025-05-26 13:07:22,791 SpawnPoolWorker-38 DEBUG    upload finished in 1.660222s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:22,946 SpawnPoolWorker-41 DEBUG    upload finished in 0.917905s, attributes: file_id=c9d9fcf05b72
2025-05-26 13:07:22,946 SpawnPoolWorker-41 DEBUG    upload finished in 0.91859s, attributes: file_id=c9d9fcf05b72
upload:  87%|████████▋ | 917/1056 [01:47<00:20,  6.82it/s]2025-05-26 13:07:22,948 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0e97a376a3a1.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:23,074 SpawnPoolWorker-40 DEBUG    upload finished in 0.942546s, attributes: file_id=d7cb146de1df
2025-05-26 13:07:23,074 SpawnPoolWorker-40 DEBUG    upload finished in 0.943097s, attributes: file_id=d7cb146de1df
upload:  87%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:23,192 SpawnPoolWorker-34 DEBUG    upload finished in 1.319261s, attributes: file_id=1dfc4744b11f
2025-05-26 13:07:23,193 SpawnPoolWorker-34 DEBUG    upload finished in 1.319902s, attributes: file_id=1dfc4744b11f
upload:  87%|████████▋ | 919/1056 [01:48<00:18,  7.40it/s]2025-05-26 13:07:23,195 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/66133cffe6b9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:23,302 SpawnPoolWorker-36 DEBUG    upload finished in 1.045315s, attributes: file_id=935121a3d77b
2025-05-26 13:07:23,303 SpawnPoolWorker-36 DEBUG    upload finished in 1.045832s, attributes: file_id=935121a3d77b
upload:  87%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:23,441 SpawnPoolWorker-39 DEBUG    upload finished in 1.038992s, attributes: file_id=96c2ae5507e3
2025-05-26 13:07:23,441 SpawnPoolWorker-39 DEBUG    upload finished in 1.03962s, attributes: file_id=96c2ae5507e3
upload:  87%|████████▋ | 921/1056 [01:48<00:17,  7.64it/s]2025-05-26 13:07:23,442 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e09f4131fbb0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:23,575 SpawnPoolWorker-38 DEBUG    upload finished in 0.783048s, attributes: file_id=815c7025997d
2025-05-26 13:07:23,575 SpawnPoolWorker-38 DEBUG    upload finished in 0.783569s, attributes: file_id=815c7025997d
upload:  87%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:23,721 SpawnPoolWorker-37 DEBUG    upload finished in 1.226768s, attributes: file_id=c354fce2fc58
2025-05-26 13:07:23,722 SpawnPoolWorker-37 DEBUG    upload finished in 1.227197s, attributes: file_id=c354fce2fc58
upload:  87%|████████▋ | 923/1056 [01:48<00:18,  7.34it/s]2025-05-26 13:07:23,723 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/848da5d5478f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:23,858 SpawnPoolWorker-35 DEBUG    upload finished in 1.241284s, attributes: file_id=d51372b81f93
2025-05-26 13:07:23,859 SpawnPoolWorker-35 DEBUG    upload finished in 1.241907s, attributes: file_id=d51372b81f93
upload:  88%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:23,953 SpawnPoolWorker-41 DEBUG    upload finished in 1.004949s, attributes: file_id=0e97a376a3a1
2025-05-26 13:07:23,954 SpawnPoolWorker-41 DEBUG    upload finished in 1.006929s, attributes: file_id=0e97a376a3a1
2025-05-26 13:07:23,959 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/aea53f425a64.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:24,121 SpawnPoolWorker-36 DEBUG    upload finished in 0.817067s, attributes: file_id=da5af3ec5a48
2025-05-26 13:07:24,121 SpawnPoolWorker-36 DEBUG    upload finished in 0.81759s, attributes: file_id=da5af3ec5a48
upload:  88%|████████▊ | 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:24,241 SpawnPoolWorker-40 DEBUG    upload finished in 1.165804s, attributes: file_id=76e5232738e3
2025-05-26 13:07:24,242 SpawnPoolWorker-40 DEBUG    upload finished in 1.166473s, attributes: file_id=76e5232738e3
upload:  88%|████████▊ | 927/1056 [01:49<00:16,  7.65it/s]2025-05-26 13:07:24,244 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c4f3c133218d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:24,341 SpawnPoolWorker-38 DEBUG    upload finished in 0.764552s, attributes: file_id=7bf1c8473298
2025-05-26 13:07:24,342 SpawnPoolWorker-38 DEBUG    upload finished in 0.765066s, attributes: file_id=7bf1c8473298
upload:  88%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:24,498 SpawnPoolWorker-39 DEBUG    upload finished in 1.055785s, attributes: file_id=e09f4131fbb0
2025-05-26 13:07:24,498 SpawnPoolWorker-39 DEBUG    upload finished in 1.056206s, attributes: file_id=e09f4131fbb0
upload:  88%|████████▊ | 929/1056 [01:49<00:16,  7.59it/s]2025-05-26 13:07:24,500 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9ffe4f533279.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:24,575 SpawnPoolWorker-37 DEBUG    upload finished in 0.851866s, attributes: file_id=848da5d5478f
2025-05-26 13:07:24,575 SpawnPoolWorker-37 DEBUG    upload finished in 0.852412s, attributes: file_id=848da5d5478f
2025-05-26 13:07:24

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:25,004 SpawnPoolWorker-34 DEBUG    upload finished in 1.810274s, attributes: file_id=66133cffe6b9
2025-05-26 13:07:25,005 SpawnPoolWorker-34 DEBUG    upload finished in 1.81107s, attributes: file_id=66133cffe6b9
upload:  88%|████████▊ | 932/1056 [01:49<00:20,  5.97it/s]2025-05-26 13:07:25,009 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/02ae46f34c5b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:25,292 SpawnPoolWorker-41 DEBUG    upload finished in 1.33401s, attributes: file_id=aea53f425a64
2025-05-26 13:07:25,292 SpawnPoolWorker-41 DEBUG    upload finished in 1.334796s, attributes: file_id=aea53f425a64
upload:  88%|████████▊ | 933/1056 [01:50<00:24,  5.06it/s]2025-05-26 13:07:25,294 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f1800f177ad8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:25,476 SpawnPoolWorker-40 DEBUG    upload finished in 1.233226s, attributes: file_id=c4f3c133218d
2025-05-26 13:07:25,477 SpawnPoolWorker-40 DEBUG    upload finished in 1.23397s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:25,592 SpawnPoolWorker-39 DEBUG    upload finished in 1.092472s, attributes: file_id=9ffe4f533279
2025-05-26 13:07:25,592 SpawnPoolWorker-39 DEBUG    upload finished in 1.092912s, attributes: file_id=9ffe4f533279
upload:  89%|████████▊ | 935/1056 [01:50<00:20,  5.80it/s]2025-05-26 13:07:25,594 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f6f9db87828.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:25,718 SpawnPoolWorker-36 DEBUG    upload finished in 1.595346s, attributes: file_id=8c06295f836c
2025-05-26 13:07:25,718 SpawnPoolWorker-36 DEBUG    upload finished in 1.595823s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:25,822 SpawnPoolWorker-34 DEBUG    upload finished in 0.815501s, attributes: file_id=02ae46f34c5b
2025-05-26 13:07:25,823 SpawnPoolWorker-34 DEBUG    upload finished in 0.816135s, attributes: file_id=02ae46f34c5b
upload:  89%|████████▊ | 937/1056 [01:50<00:17,  6.96it/s]2025-05-26 13:07:25,825 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3d125e9c43fd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:25,976 SpawnPoolWorker-38 DEBUG    upload finished in 1.633106s, attributes: file_id=f46cf4c668cc
2025-05-26 13:07:25,976 SpawnPoolWorker-38 DEBUG    upload finished in 1.633591s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:26,123 SpawnPoolWorker-41 DEBUG    upload finished in 0.829431s, attributes: file_id=f1800f177ad8
2025-05-26 13:07:26,123 SpawnPoolWorker-41 DEBUG    upload finished in 0.82997s, attributes: file_id=f1800f177ad8
upload:  89%|████████▉ | 939/1056 [01:51<00:17,  6.82it/s]2025-05-26 13:07:26,125 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bfa980dba4b2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:26,234 SpawnPoolWorker-40 DEBUG    upload finished in 0.755562s, attributes: file_id=cb9c6f375afa
2025-05-26 13:07:26,235 SpawnPoolWorker-40 DEBUG    upload finished in 0.756195s, attributes: file_id=cb9c6f375afa
upload:  89%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:26,345 SpawnPoolWorker-37 DEBUG    upload finished in 1.768843s, attributes: file_id=d67ea5b60746
2025-05-26 13:07:26,345 SpawnPoolWorker-37 DEBUG    upload finished in 1.769342s, attributes: file_id=d67ea5b60746
upload:  89%|████████▉ | 941/1056 [01:51<00:14,  7.77it/s]2025-05-26 13:07:26,347 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a5abf0bc0aad.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:26,498 SpawnPoolWorker-35 DEBUG    upload finished in 1.862946s, attributes: file_id=f840f5622948
2025-05-26 13:07:26,498 SpawnPoolWorker-35 DEBUG    upload finished in 1.863397s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:26,569 SpawnPoolWorker-34 DEBUG    upload finished in 0.744619s, attributes: file_id=3d125e9c43fd
2025-05-26 13:07:26,569 SpawnPoolWorker-34 DEBUG    upload finished in 0.745171s, attributes: file_id=3d125e9c43fd
2025-05-26 13:07:26,571 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/87340de2ad34.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:26,731 SpawnPoolWorker-39 DEBUG    upload finished in 1.138187s, attributes: file_id=1f6f9db87828
2025-05-26 13:07:26,731 SpawnPoolWorker-39 DEBUG    upload finished in 1.138636s, attributes: file_id=1f6f9db87828
upload:  89%|████████▉ |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:26,867 SpawnPoolWorker-36 DEBUG    upload finished in 1.14778s, attributes: file_id=1f106fb79c25
2025-05-26 13:07:26,868 SpawnPoolWorker-36 DEBUG    upload finished in 1.148433s, attributes: file_id=1f106fb79c25
upload:  89%|████████▉ | 945/1056 [01:51<00:14,  7.73it/s]2025-05-26 13:07:26,870 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/de8af734009e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:27,033 SpawnPoolWorker-40 DEBUG    upload finished in 0.796751s, attributes: file_id=dba34574fe25
2025-05-26 13:07:27,033 SpawnPoolWorker-40 DEBUG    upload finished in 0.797252s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:27,138 SpawnPoolWorker-38 DEBUG    upload finished in 1.160793s, attributes: file_id=e7c14de8e5f5
2025-05-26 13:07:27,139 SpawnPoolWorker-38 DEBUG    upload finished in 1.161372s, attributes: file_id=e7c14de8e5f5
upload:  90%|████████▉ | 947/1056 [01:52<00:14,  7.71it/s]2025-05-26 13:07:27,141 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a9257a8cab40.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:27,269 SpawnPoolWorker-41 DEBUG    upload finished in 1.145188s, attributes: file_id=bfa980dba4b2
2025-05-26 13:07:27,270 SpawnPoolWorker-41 DEBUG    upload finished in 1.145744s, attributes: file_id=bfa980dba4b2
upload:  90%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:27,393 SpawnPoolWorker-34 DEBUG    upload finished in 0.822098s, attributes: file_id=87340de2ad34
2025-05-26 13:07:27,394 SpawnPoolWorker-34 DEBUG    upload finished in 0.823148s, attributes: file_id=87340de2ad34
upload:  90%|████████▉ | 949/1056 [01:52<00:13,  7.78it/s]2025-05-26 13:07:27,402 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82e2d5777ae0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:27,518 SpawnPoolWorker-39 DEBUG    upload finished in 0.785489s, attributes: file_id=c94d661d5115
2025-05-26 13:07:27,518 SpawnPoolWorker-39 DEBUG    upload finished in 0.786025s, attributes: file_id=c94d661d5115
upload:  90%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:27,619 SpawnPoolWorker-35 DEBUG    upload finished in 1.11977s, attributes: file_id=2c1b3cf41321
2025-05-26 13:07:27,619 SpawnPoolWorker-35 DEBUG    upload finished in 1.120298s, attributes: file_id=2c1b3cf41321
upload:  90%|█████████ | 951/1056 [01:52<00:12,  8.36it/s]2025-05-26 13:07:27,621 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b5f2de71a33d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:27,745 SpawnPoolWorker-37 DEBUG    upload finished in 1.398693s, attributes: file_id=a5abf0bc0aad
2025-05-26 13:07:27,746 SpawnPoolWorker-37 DEBUG    upload finished in 1.399252s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:27,862 SpawnPoolWorker-38 DEBUG    upload finished in 0.721729s, attributes: file_id=a9257a8cab40
2025-05-26 13:07:27,862 SpawnPoolWorker-38 DEBUG    upload finished in 0.72232s, attributes: file_id=a9257a8cab40
upload:  90%|█████████ | 953/1056 [01:52<00:12,  8.33it/s]2025-05-26 13:07:27,865 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac6dcb38a7cb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:28,016 SpawnPoolWorker-36 DEBUG    upload finished in 1.147251s, attributes: file_id=de8af734009e
2025-05-26 13:07:28,017 SpawnPoolWorker-36 DEBUG    upload finished in 1.14777s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:28,158 SpawnPoolWorker-40 DEBUG    upload finished in 1.123632s, attributes: file_id=cf15afbd0508
2025-05-26 13:07:28,159 SpawnPoolWorker-40 DEBUG    upload finished in 1.12418s, attributes: file_id=cf15afbd0508
upload:  90%|█████████ | 955/1056 [01:53<00:13,  7.47it/s]2025-05-26 13:07:28,161 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3bb2acdd241f.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:28,300 SpawnPoolWorker-34 DEBUG    upload finished in 0.900886s, attributes: file_id=82e2d5777ae0
2025-05-26 13:07:28,301 SpawnPoolWorker-34 DEBUG    upload finished in 0.902282s, attributes: file_id=82e2d5777ae0
upload:  91%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:28,403 SpawnPoolWorker-41 DEBUG    upload finished in 1.13187s, attributes: file_id=dc7f0ce45582
2025-05-26 13:07:28,403 SpawnPoolWorker-41 DEBUG    upload finished in 1.132424s, attributes: file_id=dc7f0ce45582
upload:  91%|█████████ | 957/1056 [01:53<00:12,  7.93it/s]2025-05-26 13:07:28,405 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2d30ea8f0676.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:28,498 SpawnPoolWorker-37 DEBUG    upload finished in 0.75101s, attributes: file_id=2383d4af9a74
2025-05-26 13:07:28,498 SpawnPoolWorker-37 DEBUG    upload finished in 0.751564s, attributes: file_id=2383d4af9a74
2025-05-26 13:07:28,5

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:28,608 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4532d2fa38f1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:28,769 SpawnPoolWorker-36 DEBUG    upload finished in 0.751705s, attributes: file_id=48c0b3e2ce20
2025-05-26 13:07:28,770 SpawnPoolWorker-36 DEBUG    upload finished in 0.752273s, attributes: file_id=48c0b3e2ce20
upload:  91%|█████████ | 960/1056 [01:53<00:12,  7.88it/s]2025-05-26 13:07:28,771 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd226dd04ca7.json not detected a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:28,910 SpawnPoolWorker-39 DEBUG    upload finished in 1.390272s, attributes: file_id=03971e548d56
2025-05-26 13:07:28,910 SpawnPoolWorker-39 DEBUG    upload finished in 1.390804s, attributes: file_id=03971e548d56
upload:  91%|█████████ | 961/1056 [01:53<00:12,  7.66it/s]2025-05-26 13:07:28,913 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2576f0ca614c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:29,019 SpawnPoolWorker-35 DEBUG    upload finished in 1.398547s, attributes: file_id=b5f2de71a33d
2025-05-26 13:07:29,020 SpawnPoolWorker-35 DEBUG    upload finished in 1.399025s, attributes: file_id=b5f2de71a33d
upload:  91%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:29,220 SpawnPoolWorker-40 DEBUG    upload finished in 1.060003s, attributes: file_id=3bb2acdd241f
2025-05-26 13:07:29,220 SpawnPoolWorker-40 DEBUG    upload finished in 1.06051s, attributes: file_id=3bb2acdd241f
upload:  91%|█████████ | 963/1056 [01:54<00:13,  6.85it/s]2025-05-26 13:07:29,222 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c35e51f84805.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:29,424 SpawnPoolWorker-37 DEBUG    upload finished in 0.923965s, attributes: file_id=d7e4a51e9555
2025-05-26 13:07:29,424 SpawnPoolWorker-37 DEBUG    upload finished in 0.92479s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:29,428 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/818b1e43e105.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:29,568 SpawnPoolWorker-41 DEBUG    upload finished in 1.163289s, attributes: file_id=2d30ea8f0676
2025-05-26 13:07:29,568 SpawnPoolWorker-41 DEBUG    upload finished in 1.16392s, attributes: file_id=2d30ea8f0676
upload:  91%|█████████▏| 965/1056 [01:54<00:14,  6.36it/s]2025-05-26 13:07:29,571 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0efafe00f4fb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:29,736 SpawnPoolWorker-34 DEBUG    upload finished in 1.433446s, attributes: file_id=0e4662eebb9f
2025-05-26 13:07:29,736 SpawnPoolWorker-34 DEBUG    upload finished in 1.433959s, attributes: file_id=0e4662eebb9f
upload:  91%|█████████▏| 966/1056 [01:54<00:14,  6.24it/s]2025-05-26 13:07:29,738 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d6787d83aa43.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:29,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.956029s, attributes: file_id=2576f0ca614c
2025-05-26 13:07:29,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.956575s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:29,992 SpawnPoolWorker-36 DEBUG    upload finished in 1.220957s, attributes: file_id=fd226dd04ca7
2025-05-26 13:07:29,992 SpawnPoolWorker-36 DEBUG    upload finished in 1.22146s, attributes: file_id=fd226dd04ca7
upload:  92%|█████████▏| 968/1056 [01:54<00:12,  6.96it/s]2025-05-26 13:07:29,994 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8a83f44c1b01.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:30,165 SpawnPoolWorker-38 DEBUG    upload finished in 1.558309s, attributes: file_id=4532d2fa38f1
2025-05-26 13:07:30,166 SpawnPoolWorker-38 DEBUG    upload finished in 1.558974s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:30,319 SpawnPoolWorker-40 DEBUG    upload finished in 1.097427s, attributes: file_id=c35e51f84805
2025-05-26 13:07:30,319 SpawnPoolWorker-40 DEBUG    upload finished in 1.09793s, attributes: file_id=c35e51f84805
upload:  92%|█████████▏| 970/1056 [01:55<00:13,  6.54it/s]2025-05-26 13:07:30,322 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c7c14fd0a569.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:30,500 SpawnPoolWorker-35 DEBUG    upload finished in 1.479718s, attributes: file_id=ea6e1b037937
2025-05-26 13:07:30,501 SpawnPoolWorker-35 DEBUG    upload finished in 1.480302s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:30,605 SpawnPoolWorker-37 DEBUG    upload finished in 1.178045s, attributes: file_id=818b1e43e105
2025-05-26 13:07:30,605 SpawnPoolWorker-37 DEBUG    upload finished in 1.178858s, attributes: file_id=818b1e43e105
upload:  92%|█████████▏| 972/1056 [01:55<00:12,  6.93it/s]2025-05-26 13:07:30,608 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a8d4c4c1253a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:30,736 SpawnPoolWorker-34 DEBUG    upload finished in 0.998735s, attributes: file_id=d6787d83aa43
2025-05-26 13:07:30,736 SpawnPoolWorker-34 DEBUG    upload finished in 0.99927s, attributes: file_id=d6787d83aa43
upload:  92%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:30,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.998446s, attributes: file_id=cf86e6719f28
2025-05-26 13:07:30,868 SpawnPoolWorker-39 DEBUG    upload finished in 0.99903s, attributes: file_id=cf86e6719f28
upload:  92%|█████████▏| 974/1056 [01:55<00:11,  7.25it/s]2025-05-26 13:07:30,871 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0366b7eda3f4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:31,005 SpawnPoolWorker-38 DEBUG    upload finished in 0.838423s, attributes: file_id=a3823fa5e977
2025-05-26 13:07:31,005 SpawnPoolWorker-38 DEBUG    upload finished in 0.838937s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:31,135 SpawnPoolWorker-40 DEBUG    upload finished in 0.814195s, attributes: file_id=c7c14fd0a569
2025-05-26 13:07:31,135 SpawnPoolWorker-40 DEBUG    upload finished in 0.814719s, attributes: file_id=c7c14fd0a569
upload:  92%|█████████▏| 976/1056 [01:56<00:10,  7.39it/s]2025-05-26 13:07:31,137 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/790d529bf601.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:31,239 SpawnPoolWorker-36 DEBUG    upload finished in 1.245226s, attributes: file_id=8a83f44c1b01
2025-05-26 13:07:31,239 SpawnPoolWorker-36 DEBUG    upload finished in 1.245675s, attributes: file_id=8a83f44c1b01
upload:  93%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:31,359 SpawnPoolWorker-41 DEBUG    upload finished in 1.789249s, attributes: file_id=0efafe00f4fb
2025-05-26 13:07:31,360 SpawnPoolWorker-41 DEBUG    upload finished in 1.789894s, attributes: file_id=0efafe00f4fb
upload:  93%|█████████▎| 978/1056 [01:56<00:09,  8.05it/s]2025-05-26 13:07:31,361 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7943c3b3063d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:31,480 SpawnPoolWorker-37 DEBUG    upload finished in 0.873463s, attributes: file_id=a8d4c4c1253a
2025-05-26 13:07:31,480 SpawnPoolWorker-37 DEBUG    upload finished in 0.874019s, attributes: file_id=a8d4c4c1253a
upload:  93%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:31,618 SpawnPoolWorker-34 DEBUG    upload finished in 0.880227s, attributes: file_id=9968b9eef265
2025-05-26 13:07:31,618 SpawnPoolWorker-34 DEBUG    upload finished in 0.88074s, attributes: file_id=9968b9eef265
upload:  93%|█████████▎| 980/1056 [01:56<00:09,  7.83it/s]2025-05-26 13:07:31,620 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/153f35b465d3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:31,771 SpawnPoolWorker-38 DEBUG    upload finished in 0.763564s, attributes: file_id=b46884a15095
2025-05-26 13:07:31,771 SpawnPoolWorker-38 DEBUG    upload finished in 0.764287s, attributes: file_id=b46884a15095
upload:  93%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:31,948 SpawnPoolWorker-35 DEBUG    upload finished in 1.444784s, attributes: file_id=c020ee9313b9
2025-05-26 13:07:31,948 SpawnPoolWorker-35 DEBUG    upload finished in 1.445412s, attributes: file_id=c020ee9313b9
upload:  93%|█████████▎| 982/1056 [01:56<00:10,  6.77it/s]2025-05-26 13:07:31,950 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/115f987144b2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:32,016 SpawnPoolWorker-36 DEBUG    upload finished in 0.776069s, attributes: file_id=782de5f7d99c
2025-05-26 13:07:32,017 SpawnPoolWorker-36 DEBUG    upload finished in 0.776566s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:32,297 SpawnPoolWorker-40 DEBUG    upload finished in 1.160729s, attributes: file_id=790d529bf601
2025-05-26 13:07:32,298 SpawnPoolWorker-40 DEBUG    upload finished in 1.161214s, attributes: file_id=790d529bf601
upload:  93%|█████████▎| 984/1056 [01:57<00:11,  6.24it/s]2025-05-26 13:07:32,299 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a2720baf2c29.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:32,425 SpawnPoolWorker-39 DEBUG    upload finished in 1.555809s, attributes: file_id=0366b7eda3f4
2025-05-26 13:07:32,426 SpawnPoolWorker-39 DEBUG    upload finished in 1.556374s, attributes: file_id=0366b7eda3f4
upload:  93%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:32,681 SpawnPoolWorker-37 DEBUG    upload finished in 1.199708s, attributes: file_id=c38c295848f5
2025-05-26 13:07:32,682 SpawnPoolWorker-37 DEBUG    upload finished in 1.200185s, attributes: file_id=c38c295848f5
upload:  93%|█████████▎| 986/1056 [01:57<00:12,  5.58it/s]2025-05-26 13:07:32,683 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/053fd2f9fb06.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:32,900 SpawnPoolWorker-38 DEBUG    upload finished in 1.126726s, attributes: file_id=bd9abb0e8a19
2025-05-26 13:07:32,900 SpawnPoolWorker-38 DEBUG    upload finished in 1.127689s, attributes: file_id=bd9abb0e8a19
upload:  93%|█████████▎| 987/1056 [01:57<00:13,  5.26it/s]2025-05-26 13:07:32,903 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/02925da7d964.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:33,089 SpawnPoolWorker-36 DEBUG    upload finished in 1.071453s, attributes: file_id=140f77dbac3f
2025-05-26 13:07:33,090 SpawnPoolWorker-36 DEBUG    upload finished in 1.072117s, attributes: file_id=140f77dbac3f
upload:  94%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:33,221 SpawnPoolWorker-41 DEBUG    upload finished in 1.860448s, attributes: file_id=7943c3b3063d
2025-05-26 13:07:33,221 SpawnPoolWorker-41 DEBUG    upload finished in 1.861033s, attributes: file_id=7943c3b3063d
upload:  94%|█████████▎| 989/1056 [01:58<00:11,  5.77it/s]2025-05-26 13:07:33,224 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c3b53ea5ec1d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:33,350 SpawnPoolWorker-35 DEBUG    upload finished in 1.400395s, attributes: file_id=115f987144b2
2025-05-26 13:07:33,350 SpawnPoolWorker-35 DEBUG    upload finished in 1.401112s, attributes: file_id=115f987144b2
upload:  94%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:33,492 SpawnPoolWorker-34 DEBUG    upload finished in 1.872315s, attributes: file_id=153f35b465d3
2025-05-26 13:07:33,492 SpawnPoolWorker-34 DEBUG    upload finished in 1.872802s, attributes: file_id=153f35b465d3
upload:  94%|█████████▍| 991/1056 [01:58<00:10,  6.45it/s]2025-05-26 13:07:33,494 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/70f7ccdb3aec.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:33,606 SpawnPoolWorker-37 DEBUG    upload finished in 0.92365s, attributes: file_id=053fd2f9fb06
2025-05-26 13:07:33,606 SpawnPoolWorker-37 DEBUG    upload finished in 0.924104s, attributes: file_id=053fd2f9fb06
upload:  94%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:33,745 SpawnPoolWorker-40 DEBUG    upload finished in 1.446062s, attributes: file_id=a2720baf2c29
2025-05-26 13:07:33,745 SpawnPoolWorker-40 DEBUG    upload finished in 1.446572s, attributes: file_id=a2720baf2c29
upload:  94%|█████████▍| 993/1056 [01:58<00:08,  7.06it/s]2025-05-26 13:07:33,747 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1d8b71c0671c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:33,871 SpawnPoolWorker-39 DEBUG    upload finished in 1.443276s, attributes: file_id=41a80c9a1208
2025-05-26 13:07:33,871 SpawnPoolWorker-39 DEBUG    upload finished in 1.443914s, attributes: file_id=41a80c9a1208
upload:  94%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:34,025 SpawnPoolWorker-38 DEBUG    upload finished in 1.122448s, attributes: file_id=02925da7d964
2025-05-26 13:07:34,025 SpawnPoolWorker-38 DEBUG    upload finished in 1.123039s, attributes: file_id=02925da7d964
upload:  94%|█████████▍| 995/1056 [01:59<00:08,  7.03it/s]2025-05-26 13:07:34,028 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16cf1f72cb35.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:34,166 SpawnPoolWorker-36 DEBUG    upload finished in 1.074594s, attributes: file_id=28f99b43c57f
2025-05-26 13:07:34,166 SpawnPoolWorker-36 DEBUG    upload finished in 1.075122s, attributes: file_id=28f99b43c57f
upload:  94%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:34,297 SpawnPoolWorker-35 DEBUG    upload finished in 0.945204s, attributes: file_id=3fb64c7a0440
2025-05-26 13:07:34,297 SpawnPoolWorker-35 DEBUG    upload finished in 0.945765s, attributes: file_id=3fb64c7a0440
upload:  94%|█████████▍| 997/1056 [01:59<00:08,  7.21it/s]2025-05-26 13:07:34,300 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0e20eb173ba9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:34,424 SpawnPoolWorker-41 DEBUG    upload finished in 1.201263s, attributes: file_id=c3b53ea5ec1d
2025-05-26 13:07:34,424 SpawnPoolWorker-41 DEBUG    upload finished in 1.20175s, attributes: file_id=c3b53ea5ec1d
upload:  95%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:34,534 SpawnPoolWorker-40 DEBUG    upload finished in 0.787959s, attributes: file_id=1d8b71c0671c
2025-05-26 13:07:34,535 SpawnPoolWorker-40 DEBUG    upload finished in 0.788412s, attributes: file_id=1d8b71c0671c
upload:  95%|█████████▍| 999/1056 [01:59<00:07,  7.83it/s]2025-05-26 13:07:34,537 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/92426fc857cb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:34,702 SpawnPoolWorker-37 DEBUG    upload finished in 1.093992s, attributes: file_id=f603380e7977
2025-05-26 13:07:34,702 SpawnPoolWorker-37 DEBUG    upload finished in 1.094566s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:34,849 SpawnPoolWorker-34 DEBUG    upload finished in 1.356s, attributes: file_id=70f7ccdb3aec
2025-05-26 13:07:34,850 SpawnPoolWorker-34 DEBUG    upload finished in 1.356572s, attributes: file_id=70f7ccdb3aec
upload:  95%|█████████▍| 1001/1056 [01:59<00:07,  7.04it/s]2025-05-26 13:07:34,852 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e6dc8e8655e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:34,962 SpawnPoolWorker-36 DEBUG    upload finished in 0.794802s, attributes: file_id=09869e47499c
2025-05-26 13:07:34,962 SpawnPoolWorker-36 DEBUG    upload finished in 0.795295s, attributes: file_id=09869e47499c
upload:  95%|████████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'param

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:35,193 SpawnPoolWorker-39 DEBUG    upload finished in 1.320625s, attributes: file_id=005ade0dd5ba
2025-05-26 13:07:35,194 SpawnPoolWorker-39 DEBUG    upload finished in 1.321174s, attributes: file_id=005ade0dd5ba
upload:  95%|█████████▌| 1004/1056 [02:00<00:06,  7.99it/s]2025-05-26 13:07:35,195 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/673879e45694.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:35,331 SpawnPoolWorker-38 DEBUG    upload finished in 1.304277s, attributes: file_id=16cf1f72cb35
2025-05-26 13:07:35,331 SpawnPoolWorker-38 DEBUG    upload finished in 1.304758s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:35,472 SpawnPoolWorker-40 DEBUG    upload finished in 0.936039s, attributes: file_id=92426fc857cb
2025-05-26 13:07:35,472 SpawnPoolWorker-40 DEBUG    upload finished in 0.936488s, attributes: file_id=92426fc857cb
upload:  95%|█████████▌| 1006/1056 [02:00<00:06,  7.60it/s]2025-05-26 13:07:35,474 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bafd2d1ff737.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:35,600 SpawnPoolWorker-37 DEBUG    upload finished in 0.896718s, attributes: file_id=4672c1860e4a
2025-05-26 13:07:35,601 SpawnPoolWorker-37 DEBUG    upload finished in 0.897299s, attributes: file_id=4672c1860e4a
upload:  95%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:35,754 SpawnPoolWorker-41 DEBUG    upload finished in 1.328353s, attributes: file_id=7fccc6e4983e
2025-05-26 13:07:35,754 SpawnPoolWorker-41 DEBUG    upload finished in 1.328821s, attributes: file_id=7fccc6e4983e
upload:  95%|█████████▌| 1008/1056 [02:00<00:06,  7.29it/s]2025-05-26 13:07:35,757 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/62d5fef55413.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:35,814 SpawnPoolWorker-34 DEBUG    upload finished in 0.96282s, attributes: file_id=e6dc8e8655e8
2025-05-26 13:07:35,814 SpawnPoolWorker-34 DEBUG    upload finished in 0.963365s, attributes: file_id=e6dc8e8655e8
2025-05-26 13:07:35

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:35,993 SpawnPoolWorker-36 DEBUG    upload finished in 1.0288s, attributes: file_id=9a601b572f32
2025-05-26 13:07:35,993 SpawnPoolWorker-36 DEBUG    upload finished in 1.029451s, attributes: file_id=9a601b572f32
upload:  96%|█████████▌| 1010/1056 [02:00<00:05,  7.74it/s]2025-05-26 13:07:35,995 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8cecb8c96ef7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:36,152 SpawnPoolWorker-39 DEBUG    upload finished in 0.956776s, attributes: file_id=673879e45694
2025-05-26 13:07:36,152 SpawnPoolWorker-39 DEBUG    upload finished in 0.957224s, attributes: file_id=673879e45694
upload:  96%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:36,253 SpawnPoolWorker-38 DEBUG    upload finished in 0.92092s, attributes: file_id=96d8d7883aa0
2025-05-26 13:07:36,254 SpawnPoolWorker-38 DEBUG    upload finished in 0.921378s, attributes: file_id=96d8d7883aa0
upload:  96%|█████████▌| 1012/1056 [02:01<00:05,  7.84it/s]2025-05-26 13:07:36,255 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8deeebff78fd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:36,371 SpawnPoolWorker-40 DEBUG    upload finished in 0.897794s, attributes: file_id=bafd2d1ff737
2025-05-26 13:07:36,371 SpawnPoolWorker-40 DEBUG    upload finished in 0.898291s, attributes: file_id=bafd2d1ff737
upload:  96%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:36,471 SpawnPoolWorker-37 DEBUG    upload finished in 0.868539s, attributes: file_id=6d4d881bb905
2025-05-26 13:07:36,471 SpawnPoolWorker-37 DEBUG    upload finished in 0.869181s, attributes: file_id=6d4d881bb905
2025-05-26 13:07:36,473 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0acb5c5403e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:36,602 SpawnPoolWorker-34 DEBUG    upload finished in 0.786262s, attributes: file_id=4f1dd08f7cd9
2025-05-26 13:07:36,603 SpawnPoolWorker-34 DEBUG    upload finished in 0.787143s, attributes: file_id=4f1dd08f7cd9
upload:  96%|█████████▌|

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:36,768 SpawnPoolWorker-36 DEBUG    upload finished in 0.773348s, attributes: file_id=8cecb8c96ef7
2025-05-26 13:07:36,768 SpawnPoolWorker-36 DEBUG    upload finished in 0.77381s, attributes: file_id=8cecb8c96ef7
upload:  96%|█████████▌| 1016/1056 [02:01<00:05,  7.61it/s]2025-05-26 13:07:36,769 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7748edb5e10b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:36,891 SpawnPoolWorker-35 DEBUG    upload finished in 1.855545s, attributes: file_id=bf302c847496
2025-05-26 13:07:36,892 SpawnPoolWorker-35 DEBUG    upload finished in 1.856181s, attributes: file_id=bf302c847496
upload:  96%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:37,007 SpawnPoolWorker-41 DEBUG    upload finished in 1.251088s, attributes: file_id=62d5fef55413
2025-05-26 13:07:37,007 SpawnPoolWorker-41 DEBUG    upload finished in 1.251589s, attributes: file_id=62d5fef55413
upload:  96%|█████████▋| 1018/1056 [02:01<00:04,  7.96it/s]2025-05-26 13:07:37,009 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b420a2e0c8ea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:37,128 SpawnPoolWorker-40 DEBUG    upload finished in 0.754966s, attributes: file_id=26c08148f5fd
2025-05-26 13:07:37,128 SpawnPoolWorker-40 DEBUG    upload finished in 0.755443s, attributes: file_id=26c08148f5fd
upload:  96%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:37,269 SpawnPoolWorker-37 DEBUG    upload finished in 0.797045s, attributes: file_id=0acb5c5403e8
2025-05-26 13:07:37,270 SpawnPoolWorker-37 DEBUG    upload finished in 0.797582s, attributes: file_id=0acb5c5403e8
upload:  97%|█████████▋| 1020/1056 [02:02<00:04,  7.73it/s]2025-05-26 13:07:37,271 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/01d552e519b2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:37,391 SpawnPoolWorker-34 DEBUG    upload finished in 0.786044s, attributes: file_id=acc65a21ba99
2025-05-26 13:07:37,391 SpawnPoolWorker-34 DEBUG    upload finished in 0.786749s, attributes: file_id=acc65a21ba99
upload:  97%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:37,579 SpawnPoolWorker-39 DEBUG    upload finished in 1.426106s, attributes: file_id=6c58f37dca21
2025-05-26 13:07:37,580 SpawnPoolWorker-39 DEBUG    upload finished in 1.426599s, attributes: file_id=6c58f37dca21
upload:  97%|█████████▋| 1022/1056 [02:02<00:04,  6.90it/s]2025-05-26 13:07:37,581 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/03f7a20aae1a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:37,700 SpawnPoolWorker-36 DEBUG    upload finished in 0.930941s, attributes: file_id=7748edb5e10b
2025-05-26 13:07:37,700 SpawnPoolWorker-36 DEBUG    upload finished in 0.931502s, attributes: file_id=7748edb5e10b
upload:  97%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:37,871 SpawnPoolWorker-41 DEBUG    upload finished in 0.862637s, attributes: file_id=b420a2e0c8ea
2025-05-26 13:07:37,871 SpawnPoolWorker-41 DEBUG    upload finished in 0.86304s, attributes: file_id=b420a2e0c8ea
upload:  97%|█████████▋| 1024/1056 [02:02<00:04,  6.78it/s]2025-05-26 13:07:37,873 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2102e74838f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:38,019 SpawnPoolWorker-38 DEBUG    upload finished in 1.764263s, attributes: file_id=8deeebff78fd
2025-05-26 13:07:38,019 SpawnPoolWorker-38 DEBUG    upload finished in 1.7647s, attributes: file_id=8deeebff78fd
upload:  97%|████████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:38,173 SpawnPoolWorker-37 DEBUG    upload finished in 0.901887s, attributes: file_id=01d552e519b2
2025-05-26 13:07:38,173 SpawnPoolWorker-37 DEBUG    upload finished in 0.90231s, attributes: file_id=01d552e519b2
upload:  97%|█████████▋| 1026/1056 [02:03<00:04,  6.69it/s]2025-05-26 13:07:38,175 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b20c3d251eba.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:38,269 SpawnPoolWorker-40 DEBUG    upload finished in 1.139607s, attributes: file_id=ac11171923a1
2025-05-26 13:07:38,269 SpawnPoolWorker-40 DEBUG    upload finished in 1.140137s, attributes: file_id=ac11171923a1
2025-05-26 13:07:38

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:38,401 SpawnPoolWorker-34 DEBUG    upload finished in 1.00806s, attributes: file_id=13c137ebd4aa
2025-05-26 13:07:38,401 SpawnPoolWorker-34 DEBUG    upload finished in 1.008663s, attributes: file_id=13c137ebd4aa
upload:  97%|█████████▋| 1028/1056 [02:03<00:03,  7.51it/s]2025-05-26 13:07:38,403 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6a54f1cd3079.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:38,534 SpawnPoolWorker-39 DEBUG    upload finished in 0.953695s, attributes: file_id=03f7a20aae1a
2025-05-26 13:07:38,535 SpawnPoolWorker-39 DEBUG    upload finished in 0.954234s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:38,664 SpawnPoolWorker-36 DEBUG    upload finished in 0.962025s, attributes: file_id=1abf7e180021
2025-05-26 13:07:38,664 SpawnPoolWorker-36 DEBUG    upload finished in 0.962623s, attributes: file_id=1abf7e180021
upload:  98%|█████████▊| 1030/1056 [02:03<00:03,  7.56it/s]2025-05-26 13:07:38,667 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0f00ad010b68.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:38,793 SpawnPoolWorker-38 DEBUG    upload finished in 0.77296s, attributes: file_id=4bdba5812315
2025-05-26 13:07:38,794 SpawnPoolWorker-38 DEBUG    upload finished in 0.773541s, attributes: file_id=4bdba5812315
upload:  98%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:39,045 SpawnPoolWorker-35 DEBUG    upload finished in 2.152294s, attributes: file_id=39e8dfc6bb82
2025-05-26 13:07:39,046 SpawnPoolWorker-35 DEBUG    upload finished in 2.152953s, attributes: file_id=39e8dfc6bb82
upload:  98%|█████████▊| 1032/1056 [02:04<00:03,  6.05it/s]2025-05-26 13:07:39,049 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2f2398718f82.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:39,228 SpawnPoolWorker-41 DEBUG    upload finished in 1.355527s, attributes: file_id=2102e74838f7
2025-05-26 13:07:39,229 SpawnPoolWorker-41 DEBUG    upload finished in 1.356874s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:39,404 SpawnPoolWorker-37 DEBUG    upload finished in 1.229533s, attributes: file_id=b20c3d251eba
2025-05-26 13:07:39,404 SpawnPoolWorker-37 DEBUG    upload finished in 1.230029s, attributes: file_id=b20c3d251eba
upload:  98%|█████████▊| 1034/1056 [02:04<00:03,  5.82it/s]2025-05-26 13:07:39,406 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bfc7e681bef3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:39,529 SpawnPoolWorker-39 DEBUG    upload finished in 0.992926s, attributes: file_id=2a5023d92043
2025-05-26 13:07:39,530 SpawnPoolWorker-39 DEBUG    upload finished in 0.993655s, attributes: file_id=2a5023d92043
upload:  98%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:39,677 SpawnPoolWorker-34 DEBUG    upload finished in 1.274478s, attributes: file_id=6a54f1cd3079
2025-05-26 13:07:39,677 SpawnPoolWorker-34 DEBUG    upload finished in 1.274994s, attributes: file_id=6a54f1cd3079
upload:  98%|█████████▊| 1036/1056 [02:04<00:03,  6.45it/s]2025-05-26 13:07:39,679 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/41117e10b408.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:39,894 SpawnPoolWorker-40 DEBUG    upload finished in 1.623636s, attributes: file_id=c1467d8096ef
2025-05-26 13:07:39,894 SpawnPoolWorker-40 DEBUG    upload finished in 1.62435s, attributes: file_id=c1467d8096ef
upload:  98%|█████████▊| 1037/1056 [02:04<00:03,  5.76it/s]2025-05-26 13:07:39,897 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd49cff7eb69.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:40,057 SpawnPoolWorker-35 DEBUG    upload finished in 1.009893s, attributes: file_id=2f2398718f82
2025-05-26 13:07:40,058 SpawnPoolWorker-35 DEBUG    upload finished in 1.010714s, attributes: file_id=2f2398718f82
upload:  98%|█████████▊| 1038/1056 [02:05<00:03,  5.87it/s]2025-05-26 13

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:40,215 SpawnPoolWorker-36 DEBUG    upload finished in 1.54939s, attributes: file_id=0f00ad010b68
2025-05-26 13:07:40,215 SpawnPoolWorker-36 DEBUG    upload finished in 1.549873s, attributes: file_id=0f00ad010b68
upload:  98%|█████████▊| 1039/1056 [02:05<00:02,  6.01it/s]2025-05-26 13:07:40,217 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d187f9625869.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:40,354 SpawnPoolWorker-38 DEBUG    upload finished in 1.559106s, attributes: file_id=e6a86f560587
2025-05-26 13:07:40,355 SpawnPoolWorker-38 DEBUG    upload finished in 1.559619s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 13:07:40,499 SpawnPoolWorker-41 DEBUG    upload finished in 1.266735s, attributes: file_id=62e68eae5613
2025-05-26 13:07:40,499 SpawnPoolWorker-41 DEBUG    upload finished in 1.267405s, attributes: file_id=62e68eae5613
upload:  99%|█████████▊| 1041/1056 [02:05<00:02,  6.48it/s]2025-05-26 13:07:40,501 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e47a129ad5f4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:40,624 SpawnPoolWorker-37 DEBUG    upload finished in 1.219081s, attributes: file_id=bfc7e681bef3
2025-05-26 13:07:40,625 SpawnPoolWorker-37 DEBUG    upload finished in 1.219586s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:40,751 SpawnPoolWorker-39 DEBUG    upload finished in 1.219914s, attributes: file_id=da6bf0451ff0
2025-05-26 13:07:40,751 SpawnPoolWorker-39 DEBUG    upload finished in 1.220438s, attributes: file_id=da6bf0451ff0
upload:  99%|█████████▉| 1043/1056 [02:05<00:01,  7.14it/s]2025-05-26 13:07:40,753 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9eb85e469c3e.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:40,907 SpawnPoolWorker-34 DEBUG    upload finished in 1.228511s, attributes: file_id=41117e10b408
2025-05-26 13:07:40,907 SpawnPoolWorker-34 DEBUG    upload finished in 1.229097s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:41,030 SpawnPoolWorker-40 DEBUG    upload finished in 1.133682s, attributes: file_id=fd49cff7eb69
2025-05-26 13:07:41,031 SpawnPoolWorker-40 DEBUG    upload finished in 1.134498s, attributes: file_id=fd49cff7eb69
upload:  99%|█████████▉| 1045/1056 [02:06<00:01,  7.23it/s]2025-05-26 13:07:41,033 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/872fc6806708.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 13:07:41,144 SpawnPoolWorker-38 DEBUG    upload finished in 0.787877s, attributes: file_id=d7a1438c847c
2025-05-26 13:07:41,144 SpawnPoolWorker-38 DEBUG    upload finished in 0.788385s, attributes: file_id=d7a1438c847c
upload:  99%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:41,275 SpawnPoolWorker-41 DEBUG    upload finished in 0.774965s, attributes: file_id=e47a129ad5f4
2025-05-26 13:07:41,275 SpawnPoolWorker-41 DEBUG    upload finished in 0.775429s, attributes: file_id=e47a129ad5f4
upload:  99%|█████████▉| 1047/1056 [02:06<00:01,  7.63it/s]2025-05-26 13:07:41,277 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4bc82a50626e.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 13:07:41,420 SpawnPoolWorker-35 DEBUG    upload finished in 1.360673s, attributes: file_id=c105c82d32d2
2025-05-26 13:07:41,421 SpawnPoolWorker-35 DEBUG    upload finished in 1.361179s, attributes: file_id=c105c82d32d2
upload:  99%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'cn-north-1-aws.api.clickzetta.com', 'username': 'qiliang', 'instance': 'b10c4ac3', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 13:07:41,546 SpawnPoolWorker-36 DEBUG    upload finished in 1.329596s, attributes: file_id=d187f9625869
2025-05-26 13:07:41,546 SpawnPoolWorker-36 DEBUG    upload finished in 1.330004s, attributes: file_id=d187f9625869
upload:  99%|█████████▉| 1049/1056 [02:06<00:00,  7.56it/s]Removed trailing semicolon and whitespace from query
2025-05-26 13:07:41,696 SpawnPoolWorker-37 DEBUG    upload finished in 1.069503s, attributes: file_id=dfdd41863870
2025-05-26 13:07:41,696 SpawnPoolWorker-37 DEBUG    upload finished in 1.070052s, attributes: file_id=dfdd41863870
upload:  99%|█████████▉| 1050/1056 [02:06<00:00,  7.27it/s]2025-05-26 13:07:41,866 SpawnPoolWorker-40 DEBUG    upload finished in 0.833577s, attributes: file_id=872fc6806708
2025-05-26 13:07:41,869 SpawnPoolWorker-40 DEBUG    upload finished in 0.836634s, attributes: file_id=872fc6806708
upload: 100%|█████████▉| 1051/1056 [02:06<00:00,  6.73it/s]2025-05-26 13:07:42,001 SpawnPoolWorker-39 DEBUG    upload finished in 1.248176s

In [None]:
# pipeline = Pipeline.from_configs(

#     context=ProcessorConfig(
#         verbose=True,
#         tqdm=True,
#         num_processes=20,
#     ),

#     indexer_config=S3IndexerConfig(remote_url=os.getenv("AWS_S3_NAME"), recursive=True, file_glob="**/*.md" ),
#     downloader_config=S3DownloaderConfig(),
#     source_connection_config=S3ConnectionConfig(
#         access_config=S3AccessConfig(
#             key=os.getenv("AWS_KEY"),
#             secret=os.getenv("AWS_SECRET"))
#     ),

#     partitioner_config=PartitionerConfig(
#         partition_by_api=False,
#         api_key=os.getenv("UNSTRUCTURED_API_KEY"),
#         partition_endpoint=os.getenv("UNSTRUCTURED_URL"),
#         strategy="hi_res",
#         additional_partition_args={
#             "split_pdf_page": True,
#             "split_pdf_allow_failed": True,
#             "split_pdf_concurrency_level": 15
#         }
#     ),

#     chunker_config=ChunkerConfig(
#         chunking_strategy="by_title",
#         chunk_max_characters=chunk_max_characters,
#         chunk_overlap=chunk_overlap,
#         chunk_combine_text_under_n_chars=200,
#     ),

#     embedder_config=EmbedderConfig(
#         embedding_provider = embedding_provider,
#         embedding_model_name = embedding_model_name,
#     ),

#     destination_connection_config=ClickzettaConnectionConfig(
#         access_config=ClickzettaAccessConfig(password=_password),
#         username=_username,
#         service=_service,
#         instance=_instance,
#         workspace=_workspace,
#         schema=_schema,
#         vcluster=_vcluster,
#     ),
#     stager_config=ClickzettaUploadStagerConfig(),
#     uploader_config=ClickzettaUploaderConfig(table_name=raw_table_name, documents_original_source="https://yunqi.tech/documents"),
# )

# pipeline.run()

### Clean/Transformation RAW table and Insert into Silver table

In [15]:
# You could excute more SQLs to clean and transform data before insert into Silver table.、
excute_sql(conn, clean_transformation_data_sql)

[['OPERATION SUCCEED']]

### Retrieve relevant documents from Singdata Lakehouse


In [None]:
from sentence_transformers import SentenceTransformer


def get_embedding(query):
    model = SentenceTransformer(embedding_model_name)
    return model.encode(query, normalize_embeddings=True)

def retrieve_documents(conn, query: str, num_results: int = 10):

    embedding = get_embedding(query)
    embedding_list = embedding.tolist()
    embedding_json = json.dumps(embedding_list)

    with conn.cursor() as cur:

        stmt = f"""
            WITH 
            vector_embedding_result AS (
            SELECT
                "vector_embedding" as retrieve_method,
                record_locator,
                type,
                filename,
                text,
                orig_elements,
                cosine_distance(embeddings, cast({embedding_list} as vector({embeddings_dimensions}))) AS score
            FROM {silver_table_name}
            ORDER BY score ASC
            LIMIT {num_results} 
            )
            SELECT    *  FROM      vector_embedding_result
           
            ORDER by score ASC;
        """

        cur.execute(stmt)

        results = cur.fetchall()
        columns = [desc[0] for desc in cur.description]  # Get column names from cursor description
        df = pd.DataFrame(results, columns=columns)
    return df

: 

In [26]:
query_text = "创建索引的语法是什么？"
retrieve_documents_df = retrieve_documents(conn, query_text)
retrieve_documents_df

Unnamed: 0,retrieve_method,record_locator,type,filename,text,orig_elements,score
0,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,build-inverted-index.md,构建索引\n\n对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持\n\n语...,eJztWNtuGzcQ/ZXtvuTFF5LLq1P3IYiBpgjSW4wWiA2Bl6...,0.273337
1,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-inverted-index.md,创建倒排索引\n\n具体介绍参考倒排索引介绍\n\n语法\n\nSQL CREATE TAB...,eJztXNtuG0cS/RWCL3YAX/p+8WIfEluLFeDYWUveDWALRF...,0.294409
2,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-vector-index.md,创建向量索引\n\n语法\n\nSQL CREATE TABLE table_name( c...,eJztXOlvGzcW/1cIfVjYgGPxPnoB3dTYBmiTbuJuF6gDg6...,0.326366
3,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-vector-index.md,已有的表增加向量索引\n\n语法\n\nSQL CREATE VECTOR INDEX [I...,eJztmN1v2zYQwP8VQU8JkA9+f6TYQ9d5QIAi6RJ3KBAHLk...,0.331133
4,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,inverted-index.md,案例\n\nSQL CREATE TABLE inverted_index_test( id...,eJztXW2TEzcS/itz/rJQ4UXvLZHjqpKwqXBFgIPlkiqgHL...,0.340637
5,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,inverted-index.md,倒排索引\n\n【预览发布】本功能当前处于公开预览发布阶段。\n\n倒排索引原理介绍\n\n...,eJztXelvHDeW/1d69SkBkpj3YUwW8E68OwYcZzZWZrGIAo...,0.346242
6,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,CREATE-BLOOMFILTER-INDEX.md,创建BLOOMFILTER索引\n\n功能\n\n布隆过滤器（Bloom Filter）是一...,eJztW1lPHEkS/iutfmFGGkzeh6V5GHuwFgmbkc1oRwLUyi...,0.355196
7,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-synonym.md,功能\n\n创建同义词，同义词synonym是一个数据库对象，类似给对象起一个别名。支持为以...,eJztW1lv28oV/iuEXm4fknj2xUEf3MQXMBDbbeLcIk0MZZ...,0.367206
8,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,show-create-table.md,功能\n\nSHOW CREATE TABLE 命令用于获取指定表、物化视图或视图的创建语句...,eJztW21TW7kV/it3/CVkFozeX9JpZ2jibZhJYAukmXbJ2J...,0.370698
9,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create.md,功能描述\n\n创建指定类型的对象\n\n语法\n\nCREATE <object_type...,eJztmllvGzcQx7+KoJe+1DHvIygCKIrSCJAlwJLTpIkhkM...,0.375958


In [27]:
first_row__text = retrieve_documents_df.iloc[4]['text']
print(first_row__text)

案例

SQL CREATE TABLE inverted_index_test( id int, name string, INDEX id_index (id) INVERTED , INDEX name_index (name) INVERTED PROPERTIES('analyzer'='keyword','mode' = 'smart|max_word') );

已有的表增加倒排索引

语法

SQL CREATE INVERTED INDEX [IF NOT EXISTS] index_name ON TABLE [schema].table_name(col_name) [COMMENT 'comment'] PROPERTIES('analyzer'='english｜chinese|keyword｜unicode','mode' = 'smart|max_word')

INVERTED: 索引类型，倒排索引

index_name: 表名字，位于schema下，schema下索引名称不能重复

col_name：列名只支持单列

PROPERTIES：指定INDEX的参数，支持的属性目前支持指定分词。数值和日期类型则不需要指定properties，如果是字符串类型要求必须指定分词

说明

执行CREATE INDEX仅对新增数据有效，对已有数据进行索引请使用BUILD INDEX命令。

案例

```SQL CREATE TABLE inverted_index_test( id int, name string );

CREATE INVERTED INDEX id_index ON TABLE public.inverted_index_test(name) PROPERTIES('analyzer'='unicode') ```

使用倒排索引查询

构建索引

对存量数据添加倒排索引

语法

SQL -- 语法 1，默认给全表的存量数据加上倒排索引 BUILD INDEX index_name ON [schema].table_name; -- 语法 2，可指定partition，可指定一个或多个,支持=, !=, >, >=, <, <= BUILD INDEX index_name ON table_name WHERE

In [28]:
filename = retrieve_documents_df.iloc[0]['filename']
with conn.cursor() as cur:

        stmt = f"""
            WITH 
            results AS (
            SELECT
                record_locator,
                type,
                filename,
                text,
                orig_elements,
            FROM {silver_table_name}
            WHERE filename = "{filename}"
            )
            SELECT    *  FROM      results;
        """

        cur.execute(stmt)

        results = cur.fetchall()
        columns = [desc[0] for desc in cur.description]  # Get column names from cursor description
        df = pd.DataFrame(results, columns=columns)
df

Unnamed: 0,record_locator,type,filename,text,orig_elements
0,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,build-inverted-index.md,构建索引\n\n对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持\n\n语...,eJztWNtuGzcQ/ZXtvuTFF5LLq1P3IYiBpgjSW4wWiA2Bl6...


In [29]:
all_text = "".join(df["text"].astype(str).tolist())
print(all_text)

构建索引

对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持

语法

SQL -- 语法 1，默认给全表的存量数据加上索引 BUILD INDEX index_name ON [schema].table_name; -- 语法 2，可指定partition，可指定一个或多个,支持=, !=, >, >=, <, <= BUILD INDEX index_name ON table_name WHERE partition_name1 = '1' and partition_name2 = '2';

index_name：指定要添加索引名称

支持指定分区构建：可以指定一个或者多个

说明

执行BULD INDEX是一个同步任务，执行过程会消耗计算资源。查看进度可以通过Job Profile查看。

当分区表数据量较大时，建议以分区为粒度依次创建索引。

案例

SQL BUILD INDEX bulkload_data_index ON public.bulkload_data ;


In [19]:
kb = "ClickZetta\云器\Singdata之间的关系:ClickZetta是云器、Singdata的技术品牌。云器是ClickZetta的中国商业公司，Singdata是ClickZetta的海外商业公司。所以在不区分商业和技术的时候，三者是等同的。另外云器经常也会用到Yunqi的汉语拼音名称。"
embedded_kb = get_embedding(kb)
embedded_kb = embedded_kb.tolist()
add_kb_sql = f"""
INSERT INTO quick_start.clickzetta_doc_kb.{silver_table_name} (
  id, type, record_id, element_id, filetype, last_modified, languages, text, embeddings, date_created, date_modified, date_processed
) VALUES (
  uuid(), 'UserInput', uuid(), uuid(), 'text', CURRENT_TIMESTAMP, '["zh-cn"]',
  '{kb}',
  CAST('{embedded_kb}' AS vector(float,{embeddings_dimensions})), CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
);
"""

In [20]:
with conn.cursor() as cur:
        cur.execute(add_kb_sql)