# Transforming Unstructured Data from an AWS S3 bucket into RAG-Ready Data in Singdata Lakehouse

In [1]:
import json
import pandas as pd
import logging
import warnings

logging.basicConfig(level=logging.ERROR, force=True)
warnings.filterwarnings("ignore", category=UserWarning)

# if you want to drop the tables before write data, set drop_tables to True
drop_tables = True

In [2]:
import os
import dotenv

dotenv.load_dotenv('./.env') # replace with the path to your .env file

True

In [3]:
# Define the table names to use for storing the data in Lakehouse.
# index_and_table_prefix = "base_512_"
# raw_table_name = f"{index_and_table_prefix}yunqi_raw_elements"
# silver_table_name = f"{index_and_table_prefix}yunqi_elements"
# embeddings_dimensions = 768
# chunk_max_characters =512
# chunk_overlap = 200
# embedding_provider = "huggingface"
# embedding_model_name = "BAAI/bge-base-zh-v1.5"


index_and_table_prefix = "m3_1024_2048_"
raw_table_name = f"{index_and_table_prefix}yunqi_raw_elements"
silver_table_name = f"{index_and_table_prefix}yunqi_elements"
embeddings_dimensions = 1024
chunk_max_characters =2048
chunk_overlap = 512
embedding_provider = "huggingface"
embedding_model_name = "BAAI/bge-m3"


In [4]:
# Get the connection parameter to Singdata Lakehouse.
_username = os.getenv("cz_username")
_password = os.getenv("cz_password")
_service = os.getenv("cz_service")
_instance = os.getenv("cz_instance")
_workspace = os.getenv("cz_workspace")
_schema = os.getenv("cz_schema")
_vcluster = os.getenv("cz_vcluster")

In [5]:
# Define the schema to use for storing the data in Singdata Lakehouse.
raw_table_ddl = f"""
CREATE TABLE IF NOT EXISTS {_schema}.{raw_table_name} (
    id STRING, -- Auto-increment sequence
    record_locator STRING,
    type STRING,
    record_id STRING, -- Record identifier from the data source (e.g., record locator in connector metadata)
    element_id STRING, -- Unique identifier for the element (SHA-256 or UUID)
    filetype STRING, -- File type (e.g., PDF, DOCX, EML, etc.)
    file_directory STRING, -- Directory where the file is located
    filename STRING, -- File name
    last_modified TIMESTAMP, -- Last modified time of the file
    languages STRING, -- Document language, supports a list of multiple languages
    page_number STRING, -- Page number (applicable for PDF, DOCX, etc.)
    text STRING, -- Extracted text content
    embeddings VECTOR({embeddings_dimensions}), -- Vector data
    parent_id STRING, -- Parent element ID, used to represent element hierarchy
    is_continuation BOOLEAN, -- Whether it is a continuation of the previous element (used in chunking)
    orig_elements STRING, -- Original element in JSON format (used to store the complete element structure)
    element_type STRING, -- Element type (e.g., NarrativeText, Title, Table, etc.)
    coordinates STRING, -- Element coordinates (stored in JSONB format)
    link_texts STRING, -- Added field: Link text
    link_urls STRING, -- Added field: Link URL
    email_message_id STRING, -- Added field: Email message ID
    sent_from STRING, -- Added field: Sender
    sent_to STRING, -- Added field: Recipient
    subject STRING, -- Added field: Subject
    url STRING, -- Added field: URL
    version STRING, -- Added field: Version
    date_created TIMESTAMP, -- Added field: Creation date
    date_modified TIMESTAMP, -- Added field: Modification date
    date_processed TIMESTAMP, -- Added field: Processing date
    text_as_html STRING, -- Added field: Text in HTML format
    emphasized_text_contents STRING,
    emphasized_text_tags STRING,
    documents_original_source STRING, -- Added field: Document source
);
"""

silver_table_ddl = f"""
CREATE TABLE IF NOT EXISTS {_schema}.{silver_table_name} (
    id STRING, -- Auto-increment sequence
    record_locator STRING,
    type STRING,
    record_id STRING, -- Record identifier from the data source (e.g., record locator in connector metadata)
    element_id STRING, -- Unique identifier for the element (SHA-256 or UUID)
    filetype STRING, -- File type (e.g., PDF, DOCX, EML, etc.)
    file_directory STRING, -- Directory where the file is located
    filename STRING, -- File name
    last_modified TIMESTAMP, -- Last modified time of the file
    languages STRING, -- Document language, supports a list of multiple languages
    page_number STRING, -- Page number (applicable for PDF, DOCX, etc.)
    text STRING, -- Extracted text content
    embeddings vector({embeddings_dimensions}), -- Vector data
    parent_id STRING, -- Parent element ID, used to represent element hierarchy
    is_continuation BOOLEAN, -- Whether it is a continuation of the previous element (used in chunking)
    orig_elements STRING, -- Original element in JSON format (used to store the complete element structure)
    element_type STRING, -- Element type (e.g., NarrativeText, Title, Table, etc.)
    coordinates STRING, -- Element coordinates (stored in JSONB format)
    link_texts STRING, -- Added field: Link text
    link_urls STRING, -- Added field: Link URL
    email_message_id STRING, -- Added field: Email message ID
    sent_from STRING, -- Added field: Sender
    sent_to STRING, -- Added field: Recipient
    subject STRING, -- Added field: Subject
    url STRING, -- Added field: URL
    version STRING, -- Added field: Version
    date_created TIMESTAMP, -- Added field: Creation date
    date_modified TIMESTAMP, -- Added field: Modification date
    date_processed TIMESTAMP, -- Added field: Processing date
    text_as_html STRING, -- Added field: Text in HTML format
    emphasized_text_contents STRING,
    emphasized_text_tags STRING,
    documents_source STRING, -- Added field: Document source
    INDEX {index_and_table_prefix}inverted_text_index_yunqi_cn (text) INVERTED  PROPERTIES('analyzer'='unicode'),
    INDEX {index_and_table_prefix}embeddings_vec_index_yunqi_cn(embeddings) USING vector properties (
        "scalar.type" = "f32",
        "distance.function" = "cosine_distance")
);
"""

clean_transformation_data_sql = f"""
INSERT overwrite {_schema}.{silver_table_name}
SELECT 
    id, 
    record_locator, 
    type, 
    record_id, 
    element_id, 
    filetype, 
    file_directory, 
    filename, 
    last_modified, 
    languages, 
    page_number, 
    text, 
    CAST(embeddings AS VECTOR({embeddings_dimensions})) AS embeddings, 
    parent_id, 
    is_continuation, 
    orig_elements, 
    element_type, 
    coordinates, 
    link_texts, 
    link_urls, 
    email_message_id, 
    sent_from, 
    sent_to, 
    subject, 
    url, 
    version, 
    date_created, 
    date_modified, 
    date_processed, 
    text_as_html,
    emphasized_text_contents, 
    emphasized_text_tags,
    "https://yunqi.tech/documents" as documents_source
FROM {_schema}.{raw_table_name};
"""

In [6]:
# Define the function to create the connection to Singdata Lakehouse.
from clickzetta.connector import connect
import pandas as pd
def get_connection(password, username, service, instance, workspace, schema, vcluster):
    connection = connect(
        password=password,
        username=username,
        service=service,
        instance=instance,
        workspace=workspace,
        schema=schema,
        vcluster=vcluster)
    return connection

In [7]:
# Create the connection to Singdata Lakehouse.
conn = get_connection(password=_password, username=_username, service=_service, instance=_instance, workspace=_workspace, schema=_schema, vcluster=_vcluster)

In [8]:
# Function to execute SQL statements
def excute_sql(conn,sql_statement: str):
    with conn.cursor() as cur:

        stmt = sql_statement

        cur.execute(stmt)

        results = cur.fetchall()

    return results

In [9]:
if drop_tables:
    excute_sql(conn,f"DROP TABLE IF EXISTS {_schema}.{raw_table_name}")
    # excute_sql(conn,f"DROP TABLE IF EXISTS {_schema}.{silver_table_name}")

In [10]:
# Create Table in Singdata Lakehouse
excute_sql(conn, raw_table_ddl)
excute_sql(conn, silver_table_ddl)

[['OPERATION SUCCEED']]

### PDFs/Images/Emails ingestion and preprocessing pipeline

In [11]:
from unstructured_ingest.interfaces import ProcessorConfig
from unstructured_ingest.pipeline.pipeline import Pipeline
from unstructured_ingest.processes.chunker import ChunkerConfig
from unstructured_ingest.processes.connectors.fsspec.s3 import (
    S3ConnectionConfig,
    S3DownloaderConfig,
    S3IndexerConfig,
    S3AccessConfig,
)
from unstructured_ingest.processes.connectors.local import (
    LocalIndexerConfig,
    LocalDownloaderConfig,
    LocalConnectionConfig
)
from unstructured_ingest.processes.embedder import EmbedderConfig
from unstructured_ingest.processes.partitioner import PartitionerConfig

from unstructured_ingest.processes.connectors.sql.clickzetta import (
    ClickzettaConnectionConfig,
    ClickzettaAccessConfig,
    ClickzettaUploadStagerConfig,
    ClickzettaUploaderConfig
)

In [12]:
# !rm -rf /Users/liangmo/.cache/unstructured/ingest/pipeline/*

In [13]:
os.getenv("LOCAL_FILE_INPUT_DIR")

'/Users/liangmo/yunqidoc/cn_markdown_20250526'

In [14]:
pipeline = Pipeline.from_configs(

    context=ProcessorConfig(
        verbose=True,
        tqdm=True,
        num_processes=8,
    ),

    indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR"),file_glob="**/*", recursive=True),
        downloader_config=LocalDownloaderConfig(),
        source_connection_config=LocalConnectionConfig(),

    partitioner_config=PartitionerConfig(
        partition_by_api=False,
        api_key=os.getenv("UNSTRUCTURED_API_KEY"),
        partition_endpoint=os.getenv("UNSTRUCTURED_URL"),
        strategy="hi_res",
        additional_partition_args={
            "split_pdf_page": True,
            "split_pdf_allow_failed": True,
            "split_pdf_concurrency_level": 8
        }
    ),

    chunker_config=ChunkerConfig(
        chunking_strategy="by_title",
        chunk_max_characters=chunk_max_characters,
        chunk_overlap=chunk_overlap,
        chunk_combine_text_under_n_chars=200,
    ),

    embedder_config=EmbedderConfig(
        embedding_provider = embedding_provider,
        embedding_model_name = embedding_model_name,
    ),

    destination_connection_config=ClickzettaConnectionConfig(
        access_config=ClickzettaAccessConfig(password=_password),
        username=_username,
        service=_service,
        instance=_instance,
        workspace=_workspace,
        schema=_schema,
        vcluster=_vcluster,
    ),
    stager_config=ClickzettaUploadStagerConfig(),
    uploader_config=ClickzettaUploaderConfig(table_name=raw_table_name, documents_original_source="https://yunqi.tech/documents"),
)

pipeline.run()

2025-05-26 19:23:12,626 MainProcess INFO     created indexer with configs: {"input_path":"/Users/liangmo/yunqidoc/cn_markdown_20250526","recursive":true}, connection configs: {"access_config":"**********"}
2025-05-26 19:23:12,627 MainProcess INFO     Created download with configs: {"download_dir":null}, connection configs: {"access_config":"**********"}
2025-05-26 19:23:12,627 MainProcess INFO     created partition with configs: {"strategy":"hi_res","ocr_languages":null,"encoding":null,"additional_partition_args":{"split_pdf_page":true,"split_pdf_allow_failed":true,"split_pdf_concurrency_level":8},"skip_infer_table_types":null,"fields_include":["element_id","text","type","metadata","embeddings"],"flatten_metadata":false,"metadata_exclude":[],"element_exclude":[],"metadata_include":[],"partition_endpoint":"https://api.unstructuredapp.io/general/v0/general","partition_by_api":false,"api_timeout_ms":null,"api_key":"**********","hi_res_model_name":null,"raise_unsupported_filetype":false}
2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:23,318 MainProcess INFO     running local pipeline: indexer (LocalIndexer) -> download (LocalDownloader) -> partition (hi_res) -> chunk (by_title) -> embed (huggingface) -> upload_stage (ClickzettaUploadStager) -> upload (ClickzettaUploader) with configs: {"reprocess":false,"verbose":true,"tqdm":true,"work_dir":"/Users/liangmo/.cache/unstructured/ingest/pipeline","num_processes":8,"max_connections":null,"raise_on_error":false,"disable_parallelism":false,"preserve_downloads":false,"download_only":false,"re_download":false,"uncompress":false,"iter_delete":false,"delete_cache":false,"otel_endpoint":null,"status":{}}
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
2025-05-26 19:23:23,399 MainProcess INFO     indexer finished in 8.4e-05s
2025-

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:23:46,372 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/424b07d8344c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:46,404 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b1f58fa580de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:46,411 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1821b8bf1498.json not detected as batch file data
A value is trying t

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:48,168 SpawnPoolWorker-36 DEBUG    upload finished in 1.756919s, attributes: file_id=1821b8bf1498
2025-05-26 19:23:48,169 SpawnPoolWorker-36 DEBUG    upload finished in 1.759011s, attributes: file_id=1821b8bf1498
upload:   0%|          | 4/1056 [00:03<13:15,  1.32it/s]2025-05-26 19:23:48,172 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9611b8eaa985.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:23:48,886 SpawnPoolWorker-39 DEBUG    upload finished in 1.494749s, attributes: file_id=30ce2ed2e990
2025-05-26 19:23:48,887 SpawnPoolWorker-39 DEBUG    upload finished in 1.495839s, attributes: file_id=30ce2ed2e990
upload:   0%|          | 5/1056 [00:04<13:03,  1.34it/s]2025-05-26 19:23:48,889 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f9d1f85f2d19.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:49,009 SpawnPoolWorker-40 DEBUG    upload finished in 1.59462s, attributes: file_id=282224d85f28
2025-05-26 19:23:49,011 SpawnPoolWorker-40 DEBUG    upload finished in 1.597159s, attributes: file_id=282224d85f28
upload:   1%|         

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:49,101 SpawnPoolWorker-34 DEBUG    upload finished in 1.692571s, attributes: file_id=7436ad49ca43
2025-05-26 19:23:49,103 SpawnPoolWorker-34 DEBUG    upload finished in 1.694953s, attributes: file_id=7436ad49ca43
2025-05-26 19:23:49,109 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9496c96e5625.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:49,179 SpawnPoolWorker-35 DEBUG    upload finished in 2.807154s, attributes: file_id=424b07d8344c
2025-05-26 19:23:49,182 SpawnPoolWorker-35 DEBUG    upload finished in 2.812631s, attributes: file_id=424b07d8344c
upload:   1%|          | 8/1056 [00:04<06:09,  2.84it/s]2025-05-26 19:23:49,195 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:49,307 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28afb76e7309.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:49,324 SpawnPoolWorker-41 DEBUG    upload finished in 2.898435s, attributes: file_id=c931a58e21e4
2025-05-26 19:23:49,325 SpawnPoolWorker-41 DEBUG    upload finished in 2.899598s, attributes: file_id=c931a58e21e4
2025-05-26 19:23:49,327 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/30ba797226c3.json not detected as batch file data
2025-05-26 19:23:49,341 SpawnPoolWorker-37 DEBUG    upload finished in 2.860514s, attributes: file_id=14b35af5b6f4
2025-05-26 19:23:49,344 SpawnPoolWorker-37 DEBUG    upload finished in 2.864666s, at

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:49,873 SpawnPoolWorker-36 DEBUG    upload finished in 1.701368s, attributes: file_id=9611b8eaa985
2025-05-26 19:23:49,873 SpawnPoolWorker-36 DEBUG    upload finished in 1.702073s, attributes: file_id=9611b8eaa985
upload:   1%|          | 12/1056 [00:05<04:12,  4.14it/s]2025-05-26 19:23:49,875 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/24d29fbac1bc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:23:50,462 SpawnPoolWorker-40 DEBUG    upload finished in 1.44804s, attributes: file_id=741884a547ff
2025-05-26 19:23:50,462 SpawnPoolWorker-40 DEBUG    upload finished in 1.448658s, attributes: file_id=741884a547ff
upload:   1%|          | 13/1056 [00:05<05:22,  3.23it/s]2025-05-26 19:23:50,463 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8f6754bb56da.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:50,609 SpawnPoolWorker-41 DEBUG    upload finished in 1.281918s, attributes: file_id=30ba797226c3
2025-05-26 19:23:50,609 SpawnPoolWorker-41 DEBUG    upload finished in 1.28256s, attributes: file_id=30ba797226c3
upload:   1%|▏        

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:50,738 SpawnPoolWorker-39 DEBUG    upload finished in 1.849607s, attributes: file_id=f9d1f85f2d19
2025-05-26 19:23:50,739 SpawnPoolWorker-39 DEBUG    upload finished in 1.850193s, attributes: file_id=f9d1f85f2d19
upload:   2%|▏         | 16/1056 [00:06<03:21,  5.16it/s]2025-05-26 19:23:50,740 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/62e679a0b27e.json not detected as batch file data
2025-05-26 19:23:50,792 SpawnPoolWorker-37 DEBUG    upload finished in 1.446451s, attributes: file_id=d1aa326fbb91
2025-05-26 19:23:50,793 SpawnPoolWorker-37 DEBUG    upload finished in 1.447208s, attributes: file_id=d1aa326fbb91
2025-05-26 19:23:50,796 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fcb6a179ed57.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:51,037 SpawnPoolWorker-38 DEBUG    upload finished in 1.729605s, attributes: file_id=28afb76e7309
2025-05-26 19:23:51,037 SpawnPoolWorker-38 DEBUG    upload finished in 1.730677s, attributes: file_id=28afb76e7309
upload:   2%|▏         | 18/1056 [00:06<03:04,  5.62it/s]2025-05-26 19:23:51,049 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9618f566a8db.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:51,060 SpawnPoolWorker-35 DEBUG    upload finished in 1.86671s, attributes: file_id=2c3d5e6b3641
2025-05-26 19:23:51,063 SpawnPoolWorker-35 DEBUG    upload finished in 1.871946s, attributes: file_id=2c3d5e6b3641
2025-05-26 19:23:51,076 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:51,324 SpawnPoolWorker-36 DEBUG    upload finished in 1.446563s, attributes: file_id=24d29fbac1bc
2025-05-26 19:23:51,338 SpawnPoolWorker-36 DEBUG    upload finished in 1.463365s, attributes: file_id=24d29fbac1bc
upload:   2%|▏         | 20/1056 [00:06<02:54,  5.92it/s]2025-05-26 19:23:51,355 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2ffc2f8c056f.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:23:52,122 SpawnPoolWorker-39 DEBUG    upload finished in 1.38122s, attributes: file_id=62e679a0b27e
2025-05-26 19:23:52,126 SpawnPoolWorker-39 DEBUG    upload finished in 1.386326s, attributes: file_id=62e679a0b27e
2025-05-26 19:23:52,128 SpawnPoolWorker-41 DEBUG    upload finished in 1.516925s, attributes: file_id=0d739f0d02b6
upload:   2%|▏         | 21/1056 [00:07<04:59,  3.46it/s]2025-05-26 19:23:52,130 SpawnPoolWorker-41 DEBUG    upload finished in 1.520209s, attributes: file_id=0d739f0d02b6
2025-05-26 19:23:52,134 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6b4b59e1796f.json not detected as batch file data
2025-05-26 19:23:52,138 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8726c433e812.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:23:52,528 SpawnPoolWorker-38 DEBUG    upload finished in 1.478204s, attributes: file_id=9618f566a8db
2025-05-26 19:23:52,536 SpawnPoolWorker-38 DEBUG    upload finished in 1.487031s, attributes: file_id=9618f566a8db
upload:   3%|▎         | 27/1056 [00:08<02:35,  6.63it/s]2025-05-26 19:23:52,544 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d76f1ee9a93a.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitesp

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:52,897 SpawnPoolWorker-36 DEBUG    upload finished in 1.543005s, attributes: file_id=2ffc2f8c056f
2025-05-26 19:23:52,898 SpawnPoolWorker-36 DEBUG    upload finished in 1.544838s, attributes: file_id=2ffc2f8c056f
2025-05-26 19:23:52,900 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f8221c729f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:23:53,640 SpawnPoolWorker-39 DEBUG    upload finished in 1.507711s, attributes: file_id=6b4b59e1796f
upload:   3%|▎         | 29/1056 [00:09<04:30,  3.80it/s]2025-05-26 19:23:53,641 SpawnPoolWorker-39 DEBUG    upload finished in 1.508804s, attributes: file_id=6b4b59e1796f
2025-05-26 19:23:53,644 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0986ea88e197.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:53,665 SpawnPoolWorker-40 DEBUG    upload finished in 1.4793s, attributes: file_id=1ef697619383
2025-05-26 19:23:53,665 SpawnPoolWorker-40 DEBUG    upload finished in 1.479925s, attributes: file_id=1ef697619383
2025-05-26 19:23:53,66

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:23:53,942 SpawnPoolWorker-38 DEBUG    upload finished in 1.400586s, attributes: file_id=d76f1ee9a93a
2025-05-26 19:23:53,943 SpawnPoolWorker-38 DEBUG    upload finished in 1.401269s, attributes: file_id=d76f1ee9a93a
upload:   3%|▎         | 35/1056 [00:09<02:26,  6.97it/s]Removed trailing semicolon and whitespace from query
2025-05-26 19:23:53,945 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e83057c7f917.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:54,037 SpawnPoolWorker-36 DEBUG    upload finished in 1.137872s, attribute

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:54,913 SpawnPoolWorker-37 DEBUG    upload finished in 1.210258s, attributes: file_id=d7174cb6c95e
2025-05-26 19:23:54,913 SpawnPoolWorker-37 DEBUG    upload finished in 1.210843s, attributes: file_id=d7174cb6c95e
upload:   4%|▎         | 37/1056 [00:10<03:55,  4.33it/s]2025-05-26 19:23:54,915 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6777617ff215.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:54,923 SpawnPoolWorker-39 DEBUG    upload finished in 1.279757s, attributes: file_id=0986ea88e197
2025-05-26 19:23:54,926 SpawnPoolWorker-39 DEBUG    upload finished in 1.282088s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:23:55,169 SpawnPoolWorker-35 DEBUG    upload finished in 1.418136s, attributes: file_id=7f2e56dd3c7f
2025-05-26 19:23:55,169 SpawnPoolWorker-35 DEBUG    upload finished in 1.419101s, attributes: file_id=7f2e56dd3c7f
upload:   4%|▍         | 43/1056 [00:10<02:01,  8.33it/s]2025-05-26 19:23:55,171 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a9743bffe8ca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:55,296 SpawnPoolWorker-36 DEBUG    upload finished in 1.257044s, attributes: file_id=d6f74c45c704
2025-05-26 19:23:55,296 SpawnPoolWorker-36 DEBUG    upload finished in 1.257567s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:56,244 SpawnPoolWorker-40 DEBUG    upload finished in 1.221015s, attributes: file_id=3b77de03c19d
2025-05-26 19:23:56,245 SpawnPoolWorker-40 DEBUG    upload finished in 1.221615s, attributes: file_id=3b77de03c19d
upload:   4%|▍         | 45/1056 [00:11<03:41,  4.57it/s]2025-05-26 19:23:56,246 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c815b90e87b6.json not detected as batch file data
2025-05-26 19:23:56,268 SpawnPoolWorker-37 DEBUG    upload finished in 1.353568s, attributes: file_id=6777617ff215
2025-05-26 19:23:56,268 SpawnPoolWorker-37 DEBUG    upload finished in 1.354013s, attributes: file_id=6777617ff215
2025-05-26 19:23:56,270 SpawnPoolWorker-37 DEBUG    /User

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:23:56,451 SpawnPoolWorker-41 DEBUG    upload finished in 1.357277s, attributes: file_id=b04c0ba0d9c2
2025-05-26 19:23:56,451 SpawnPoolWorker-41 DEBUG    upload finished in 1.35785s, attributes: file_id=b04c0ba0d9c2
2025-05-26 19:23:56,473 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53f7f44bad0e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:56,491 SpawnPoolWorker-38 DEBUG    upload finished in 1.389063s, attributes: file_id=d6dfee6e4a1e
2025-05-26 19:23:56,492 SpawnPoolWorker-38 DEBUG    upload finished in 1.389807s, attributes: file_id=d6dfee6e4a1e
2025-05-26 19:23:56,494 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2a5b44ffcb89.json no

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:57,478 SpawnPoolWorker-40 DEBUG    upload finished in 1.232604s, attributes: file_id=c815b90e87b6
2025-05-26 19:23:57,480 SpawnPoolWorker-40 DEBUG    upload finished in 1.233865s, attributes: file_id=c815b90e87b6
upload:   5%|▌         | 53/1056 [00:12<03:28,  4.81it/s]2025-05-26 19:23:57,484 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4c1a71d89eb3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None},

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:57,780 SpawnPoolWorker-35 DEBUG    upload finished in 1.382029s, attributes: file_id=97069fb7205f
2025-05-26 19:23:57,780 SpawnPoolWorker-35 DEBUG    upload finished in 1.382743s, attributes: file_id=97069fb7205f
2025-05-26 19:23:57,783 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b66015eb79b4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:23:57,797 SpawnPoolWorker-41 DEBUG    upload finished in 1.344567s, attributes: file_id=53f7f44bad0e
2025-05-26 19:23:57,798 SpawnPoolWorker-41 DEBUG    upload finished in 1.345211s, attributes: file_id=53f7f44bad0e
upload:   5%|▌         | 57/1056 [00:13<02:28,  6.73it/s]2025-05-26 19:23:57,800 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:23:58,738 SpawnPoolWorker-40 DEBUG    upload finished in 1.253745s, attributes: file_id=4c1a71d89eb3
2025-05-26 19:23:58,738 SpawnPoolWorker-40 DEBUG    upload finished in 1.25469s, attributes: file_id=4c1a71d89eb3
2025-05-26 19:23:58,741 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec27297bcb6a.json not detected as batch file data
2025-05-26 19:23:58,903 SpawnPoolWorker-37 DEBUG    upload finished in 1.324023s, attributes: file_id=30173c8f1ce3
2025-05-26 19:23:58,904 SpawnPoolWorker-37 DEBUG    upload finished in 1.325135s, attributes: file_id=30173c8f1ce3
upload:   6%|▌         | 62/1056 [00:14<03:24,  4.87it/s]2025-05-26 19:23:58,905 SpawnPoolWorker-37 DEBUG    /Users

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:23:59,060 SpawnPoolWorker-41 DEBUG    upload finished in 1.260711s, attributes: file_id=fe5a54087049
2025-05-26 19:23:59,061 SpawnPoolWorker-41 DEBUG    upload finished in 1.261591s, attributes: file_id=fe5a54087049
2025-05-26 19:23:59,062 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e120653f79d4.json not detected as batch file data
2025-05-26 19:23:59,097 SpawnPoolWorker-39 DEBUG    upload finished in 1.419775s, attributes: file_id=7253843cb9ee
2025-05-26 19:23:59,097 SpawnPoolWorker-39 DEBUG    upload finished in 1.420307s, attributes: file_id=7253843cb9ee
upload:   6%|▌         | 64/1056 [00:14<02:54,  5.67it/s]2025-05-26 19:23:59,099 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/58d725616b0c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:23:59,316 SpawnPoolWorker-34 DEBUG    upload finished in 1.430687s, attributes: file_id=95e6845c6a4f
2025-05-26 19:23:59,316 SpawnPoolWorker-34 DEBUG    upload finished in 1.431242s, attributes: file_id=95e6845c6a4f
2025-05-26 19:23:59,317 SpawnPoolWorker-36 DEBUG    upload finished in 1.430053s, attributes: file_id=c806cf1621a5
2025-05-26 19:23:59,317 SpawnPoolWorker-36 DEBUG    upload finished in 1.43059s, attributes: file_id=c806cf1621a5
2025-05-26 19:23:59,318 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/aa28d1f2d64f.json not detected as batch file data
2025-05-26 19:23:59,318 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1272d4e99b29.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace(

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:00,037 SpawnPoolWorker-40 DEBUG    upload finished in 1.297581s, attributes: file_id=ec27297bcb6a
upload:   7%|▋         | 69/1056 [00:15<03:08,  5.23it/s]2025-05-26 19:24:00,038 SpawnPoolWorker-40 DEBUG    upload finished in 1.298213s, attributes: file_id=ec27297bcb6a
2025-05-26 19:24:00,039 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2034589fdf31.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:00,434 SpawnPoolWorker-41 DEBUG    upload finished in 1.371988s, attributes: file_id=e120653f79d4
2025-05-26 19:24:00,434 SpawnPoolWorker-41 DEBUG    upload finished in 1.372439s, attributes: file_id=e120653f79d4
upload:   7%|▋         | 70/1056 [00:15<03:39,  4.50it/s]2025-05-26 19:24:00,436 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cd7f4fb1aec5.json not detected as batch file data
2025-05-26 19:24:00,499 SpawnPoolWorker-39 DEBUG    upload finished in 1.400843s, attributes: file_id=58d725616b0c
2025-05-26 19:24:00,499 SpawnPoolWorker-39 DEBUG    upload finished in 1.401356s, attributes: file_id=58d725616b0c
2025-05-26 19:24:00,501 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec19a965c2c4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: htt

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:00,714 SpawnPoolWorker-34 DEBUG    upload finished in 1.396746s, attributes: file_id=aa28d1f2d64f
2025-05-26 19:24:00,715 SpawnPoolWorker-34 DEBUG    upload finished in 1.397522s, attributes: file_id=aa28d1f2d64f
upload:   7%|▋         | 75/1056 [00:16<02:08,  7.66it/s]2025-05-26 19:24:00,717 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4e8cf0f37e5f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:00,896 SpawnPoolWorker-36 DEBUG    upload finished in 1.578381s, attributes: file_id=1272d4e99b29
2025-05-26 19:24:00,897 SpawnPoolWorker-36 DEBUG    upload finished in 1.578888s, attributes: file_id=1272d4e99b29
2025-05-26 19:24:00,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:01,373 SpawnPoolWorker-40 DEBUG    upload finished in 1.334431s, attributes: file_id=2034589fdf31
2025-05-26 19:24:01,374 SpawnPoolWorker-40 DEBUG    upload finished in 1.335019s, attributes: file_id=2034589fdf31
upload:   7%|▋         | 77/1056 [00:16<03:02,  5.37it/s]2025-05-26 19:24:01,375 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d98912d2b7c8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None},

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:01,946 SpawnPoolWorker-41 DEBUG    upload finished in 1.510294s, attributes: file_id=cd7f4fb1aec5
2025-05-26 19:24:01,946 SpawnPoolWorker-41 DEBUG    upload finished in 1.511419s, attributes: file_id=cd7f4fb1aec5
upload:   7%|▋         | 78/1056 [00:17<04:05,  3.99it/s]2025-05-26 19:24:01,950 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/db584424487a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:02,096 SpawnPoolWorker-38 DEBUG    upload finished in 1.503899s, attributes: file_id=b2599b668ae7
2025-05-26 19:24:02,096 SpawnPoolWorker-38 DEBUG    upload finished in 1.504349s, attributes: file_id=b2599b668ae7
upload:   7%|▋      

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:02,174 SpawnPoolWorker-37 DEBUG    upload finished in 1.663293s, attributes: file_id=ad5b65f45a71
2025-05-26 19:24:02,175 SpawnPoolWorker-37 DEBUG    upload finished in 1.663928s, attributes: file_id=ad5b65f45a71
2025-05-26 19:24:02,176 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fe482e087a25.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:02,322 SpawnPoolWorker-36 DEBUG    upload finished in 1.424021s, attributes: file_id=1833f8f69368
2025-05-26 19:24:02,323 SpawnPoolWorker-36 DEBUG    upload finished in 1.425148s, attributes: file_id=1833f8f69368
upload:   8%|▊         | 81/1056 [00:17<03:04,  5.28it/s]2025-05-26 19:24:02,325 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:02,390 SpawnPoolWorker-39 DEBUG    upload finished in 1.888137s, attributes: file_id=ec19a965c2c4
2025-05-26 19:24:02,392 SpawnPoolWorker-39 DEBUG    upload finished in 1.892177s, attributes: file_id=ec19a965c2c4
2025-05-26 19:24:02,397 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8f8cde39f03d.json not detected as batch file data
2025-05-26 19:24:02,404 SpawnPoolWorker-35 DEBUG    upload finished in 1.824622s, attributes: file_id=ba04d4fe054d
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:02,404 SpawnPoolWorker-35 DEBUG    upload finished in 1.825354s, attributes: file_id=ba04d4fe054d
2025-05-26 19:24:02,406 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2bf0b60798b5.json n

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:03,110 SpawnPoolWorker-40 DEBUG    upload finished in 1.728151s, attributes: file_id=d98912d2b7c8
2025-05-26 19:24:03,126 SpawnPoolWorker-40 DEBUG    upload finished in 1.750985s, attributes: file_id=d98912d2b7c8
2025-05-26 19:24:03,134 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/98d7a2e3b026.json not detected as batch file data


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:03,612 SpawnPoolWorker-41 DEBUG    upload finished in 1.663151s, attributes: file_id=db584424487a
2025-05-26 19:24:03,613 SpawnPoolWorker-41 DEBUG    upload finished in 1.664251s, attributes: file_id=db584424487a
upload:   8%|▊         | 86/1056 [00:19<04:11,  3.86it/s]2025-05-26 19:24:03,615 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7608808cbeca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:03,788 SpawnPoolWorker-38 DEBUG    upload finished in 1.690217s, attributes: file_id=e87d79311990
2025-05-26 19:24:03,789 SpawnPoolWorker-38 DEBUG    upload finished in 1.691159s, attributes: file_id=e87d79311990
upload:   8%|▊      

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:03,893 SpawnPoolWorker-35 DEBUG    upload finished in 1.487214s, attributes: file_id=2bf0b60798b5
2025-05-26 19:24:03,893 SpawnPoolWorker-35 DEBUG    upload finished in 1.487914s, attributes: file_id=2bf0b60798b5
upload:   8%|▊         | 89/1056 [00:19<02:55,  5.52it/s]2025-05-26 19:24:03,897 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dd5d4c17bf57.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:03,903 SpawnPoolWorker-37 DEBUG    upload finished in 1.7272s, attributes: file_id=fe482e087a25
2025-05-26 19:24:03,904 SpawnPoolWorker-37 DEBUG    upload finished in 1.728204s, attributes: file_id=fe482e087a25
2025-05-26 19:24:03,908 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/u

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:04,535 SpawnPoolWorker-40 DEBUG    upload finished in 1.402924s, attributes: file_id=98d7a2e3b026
2025-05-26 19:24:04,535 SpawnPoolWorker-40 DEBUG    upload finished in 1.404124s, attributes: file_id=98d7a2e3b026
upload:   9%|▉         | 93/1056 [00:20<02:52,  5.57it/s]2025-05-26 19:24:04,537 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/604c425991c6.json not detected as batch file data


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:05,187 SpawnPoolWorker-38 DEBUG    upload finished in 1.394859s, attributes: file_id=59eb736e011d
2025-05-26 19:24:05,190 SpawnPoolWorker-38 DEBUG    upload finished in 1.399947s, attributes: file_id=59eb736e011d
2025-05-26 19:24:05,192 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fde93cca85bb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:05,303 SpawnPoolWorker-35 DEBUG    upload finished in 1.407451s, attributes: file_id=dd5d4c17bf57
2025-05-26 19:24:05,303 SpawnPoolWorker-35 DEBUG    upload finished in 1.408517s, attributes: file_id=dd5d4c17bf57
upload:   9%|▉         | 95/1056 [00:20<03:53,  4.12it/s]

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:05,305 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1cfa7e66d73.json not detected as batch file data
2025-05-26 19:24:05,489 SpawnPoolWorker-37 DEBUG    upload finished in 1.579917s, attributes: file_id=8a3d2df77468
2025-05-26 19:24:05,491 SpawnPoolWorker-37 DEBUG    upload finished in 1.584437s, attributes: file_id=8a3d2df77468
upload:   9%|▉         | 96/1056 [00:20<03:44,  4.28it/s]2025-05-26 19:24:05,511 SpawnPoolWorker-41 DEBUG    upload finished in 1.894601s, attributes: file_id=7608808cbeca
2025-05-26 19:24:05,511 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d16c9bc85580.json not detected as batch file data
2025-05-26 19:24:05,535 SpawnPoolWorker-41 DEBUG    upload finished in 1.921049s, attributes: file_id=7608808cbeca
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:06,063 SpawnPoolWorker-40 DEBUG    upload finished in 1.526249s, attributes: file_id=604c425991c6
2025-05-26 19:24:06,063 SpawnPoolWorker-40 DEBUG    upload finished in 1.527063s, attributes: file_id=604c425991c6
2025-05-26 19:24:06,065 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/10bb83350399.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:06,603 SpawnPoolWorker-35 DEBUG    upload finished in 1.298314s, attributes: file_id=c1cfa7e66d73
2025-05-26 19:24:06,604 SpawnPoolWorker-35 DEBUG    upload finished in 1.29878s, attributes: file_id=c1cfa7e66d73
upload:  10%|▉         | 102/1056 [00:22<03:35,  4.42it/s]2025-05-26 19:24:06,605 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/308721054446.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:06,682 SpawnPoolWorker-38 DEBUG    upload finished in 1.490049s, attributes: file_id=fde93cca85bb
2025-05-26 19:24:06,682 SpawnPoolWorker-38 DEBUG    upload finished in 1.490563s, attributes: file_id=fde93cca85bb
2025-05-26 19:24:06,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:06,807 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dd1aeb01e45a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:06,896 SpawnPoolWorker-39 DEBUG    upload finished in 1.349956s, attributes: file_id=850e9cfb7f0d
2025-05-26 19:24:06,896 SpawnPoolWorker-39 DEBUG    upload finished in 1.350634s, attributes: file_id=850e9cfb7f0d
upload:  10%|█         | 106/1056 [00:22<02:23,  6.62it/s]2025-05-26 19:24:06,898 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/745f799a9fd8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-doc

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:07,039 SpawnPoolWorker-37 DEBUG    upload finished in 1.533162s, attributes: file_id=d16c9bc85580
2025-05-26 19:24:07,039 SpawnPoolWorker-37 DEBUG    upload finished in 1.535315s, attributes: file_id=d16c9bc85580
2025-05-26 19:24:07,041 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/32044c2f973f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:07,059 SpawnPoolWorker-34 DEBUG    upload finished in 1.413685s, attributes: file_id=11d489a7c414
2025-05-26 19:24:07,059 SpawnPoolWorker-34 DEBUG    upload finished in 1.414293s, attributes: file_id=11d489a7c414
upload:  10%|█         | 108/1056 [00:22<02:04,  7.63it/s]2025-05-26 19:24:07

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:07,483 SpawnPoolWorker-40 DEBUG    upload finished in 1.418004s, attributes: file_id=10bb83350399
2025-05-26 19:24:07,483 SpawnPoolWorker-40 DEBUG    upload finished in 1.418638s, attributes: file_id=10bb83350399
2025-05-26 19:24:07,485 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e1c43f6baf6f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:07,994 SpawnPoolWorker-36 DEBUG    upload finished in 1.206317s, attributes: file_id=ea861701a34e
2025-05-26 19:24:07,994 SpawnPoolWorker-36 DEBUG    upload finished in 1.206773s, attributes: file_id=ea861701a34e
upload:  10%|█         | 110/1056 [00:23<03:36,  4.37it/s]2025-05-26 19:24:07,996 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2f6fd09ec93c.json not detected as batch file data
2025-05-26 19:24:08,039 SpawnPoolWorker-41 DEBUG    upload finished in 1.233224s, attributes: file_id=dd1aeb01e45a
2025-05-26 19:24:08,040 SpawnPoolWorker-41 DEBUG    upload finished in 1.233787s, attributes: file_id=dd1aeb01e45a
2025-05-26 19:24:08,041 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bc9c6776976a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:08,197 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce37a53a37be.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:08,337 SpawnPoolWorker-34 DEBUG    upload finished in 1.276884s, attributes: file_id=23c8706a3644
2025-05-26 19:24:08,338 SpawnPoolWorker-34 DEBUG    upload finished in 1.277326s, attributes: file_id=23c8706a3644
upload:  11%|█         | 115/1056 [00:23<02:15,  6.97it/s]2025-05-26 19:24:08,340 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c0ffea850d54.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:08,436 SpawnPoolWorker-37 DEBUG    upload finished in 1.395206s, attributes: file_id=32044c2f973f
2025-05-26 19:24:08,436 SpawnPoolWorker-37 DEBUG    upload finished in 1.395699s, attributes: file_id=32044c2f973f
2025-05-26 19:24:08,437 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9eb1d54550f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:08,683 SpawnPoolWorker-40 DEBUG    upload finished in 1.199073s, attributes: file_id=e1c43f6baf6f
2025-05-26 19:24:08,684 SpawnPoolWorker-40 DEBUG    upload finished in 1.199552s, attributes: file_id=e1c43f6baf6f
upload:  11%|█         | 117/1056 [00:24<02:22,  6.60it/s]2025-05-26 19:24:08,685 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3272a56d2f99.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:09,202 SpawnPoolWorker-36 DEBUG    upload finished in 1.206618s, attributes: file_id=2f6fd09ec93c
2025-05-26 19:24:09,202 SpawnPoolWorker-36 DEBUG    upload finished in 1.207109s, attributes: file_id=2f6fd09ec93c
2025-05-26 19:24:09,204 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/76b683657772.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:09,459 SpawnPoolWorker-35 DEBUG    upload finished in 1.368512s, attributes: file_id=ed5ee75b38fe
2025-05-26 19:24:09,459 SpawnPoolWorker-35 DEBUG    upload finished in 1.368952s, attributes: file_id=ed5ee75b38fe
upload:  11%|█▏        | 119/1056 [00:24<03:24,  4.58it/s]2025-05-26 19:24:09,461 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/330818b5becb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:09,557 SpawnPoolWorker-39 DEBUG    upload finished in 1.361026s, attributes: file_id=ce37a53a37be
2025-05-26 19:24:09,557 SpawnPoolWorker-39 DEBUG    upload finished in 1.361474s, attributes: file_id=ce37a53a37be
2025-05-26 19:24:09,559 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:24:09,786 SpawnPoolWorker-34 DEBUG    upload finished in 1.446938s, attributes: file_id=c0ffea850d54
2025-05-26 19:24:09,786 SpawnPoolWorker-34 DEBUG    upload finished in 1.447358s, attributes: file_id=c0ffea850d54
upload:  12%|█▏        | 123/1056 [00:25<02:21,  6.60it/s]2025-05-26 19:24:09,787 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/83b4cea47cdc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:10,042 SpawnPoolWorker-37 DEBUG    upload finished in 1.605109s, attributes: file_id=9eb1d54550f3
2025-05-26 19:24:10,044 SpawnPoolWorker-37 DEBUG    upload finished in 1.606879s, attributes: file_id=9eb1d54550f3
2025-05-26 19:24:10,046 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/856c80c3c9a9.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:10,176 SpawnPoolWorker-40 DEBUG    upload finished in 1.491014s, attributes: f

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:10,924 SpawnPoolWorker-36 DEBUG    upload finished in 1.721225s, attributes: file_id=76b683657772
2025-05-26 19:24:10,925 SpawnPoolWorker-36 DEBUG    upload finished in 1.721746s, attributes: file_id=76b683657772
upload:  12%|█▏        | 126/1056 [00:26<04:07,  3.76it/s]2025-05-26 19:24:10,926 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fe4192da8028.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:11,136 SpawnPoolWorker-34 DEBUG    upload finished in 1.34921s, attributes: file_id=83b4cea47cdc
2025-05-26 19:24:11,136 SpawnPoolWorker-41 DEBUG    upload finished in 1.532779s, attributes: file_id=0711defdce28
2025-05-26 19:24:11,136 SpawnPoolWorker-34 DEBUG    upload finished in 1.349607s, attributes: file_id=83b4cea47cdc
2025-05-26 19:24:11,137 SpawnPoolWorker-41 DEBUG    upload finished in 1.533227s, attributes: file_id=0711defdce28
upload:  12%|█▏        | 127/1056 [00:26<03:56,  3.92it/s]2025-05-26 19:24:11,139 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82eca9fb928d.json not detected as batch file data
2025-05-26 19:24:11,139 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/03aa67782f3f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:11,438 SpawnPoolWorker-40 DEBUG    upload finished in 1.260986s, attributes: file_id=aa9263afe9c4
2025-05-26 19:24:11,438 SpawnPoolWorker-40 DEBUG    upload finished in 1.261404s, attributes: file_id=aa9263afe9c4
upload:  13%|█▎        | 133/1056 [00:26<01:48,  8.48it/s]2025-05-26 19:24:11,442 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/14e30baea67f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:12,162 SpawnPoolWorker-36 DEBUG    upload finished in 1.236209s, attributes: file_id=fe4192da8028
2025-05-26 19:24:12,162 SpawnPoolWorker-36 DEBUG    upload finished in 1.236661s, attributes: file_id=fe4192da8028
2025-05-26 19:24:12,165 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bd55d61f9e4f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:12,312 SpawnPoolWorker-41 DEBUG    upload finished in 1.174269s, attributes: file_id=82eca9fb928d
2025-05-26 19:24:12,313 SpawnPool

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:12,432 SpawnPoolWorker-38 DEBUG    upload finished in 1.167711s, attributes: file_id=d6ae8be8815d
2025-05-26 19:24:12,433 SpawnPoolWorker-38 DEBUG    upload finished in 1.168106s, attributes: file_id=d6ae8be8815d
upload:  13%|█▎        | 138/1056 [00:27<02:12,  6.93it/s]2025-05-26 19:24:12,434 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/329931153dcb.json not detected as batch file data
2025-05-26 19:24:12,472 SpawnPoolWorker-35 DEBUG    upload finished in 1.137819s, attributes: file_id=6149dc3d4e99
2025-05-26 19:24:12,472 SpawnPoolWorker-35 DEBUG    upload finished in 1.138289s, attributes: file_id=6149dc3d4e99
2025-05-26 19:24:12,473 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:13,674 SpawnPoolWorker-36 DEBUG    upload finished in 1.510494s, attributes: file_id=bd55d61f9e4f
2025-05-26 19:24:13,675 SpawnPoolWorker-36 DEBUG    upload finished in 1.511347s, attributes: file_id=bd55d61f9e4f
upload:  13%|█▎        | 142/1056 [00:29<03:39,  4.16it/s]2025-05-26 19:24:13,679 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f3a329f50e66.json not detected as batch file data
2025-05-26 19:24:13,793 SpawnPoolWorker-39 DEBUG    upload finished in 1.442552s, attributes: file_id=7637130f1680
2025-05-26 19:24:13,793 SpawnPoolWorker-39 DEBUG    upload finished in 1.443177s, attributes: file_id=7637130f1680
202

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:13,884 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1467c6c4b19a.json not detected as batch file data
2025-05-26 19:24:13,939 SpawnPoolWorker-38 DEBUG    upload finished in 1.505064s, attributes: file_id=329931153dcb
2025-05-26 19:24:13,939 SpawnPoolWorker-38 DEBUG    upload finished in 1.505567s, attributes: file_id=329931153dcb
2025-05-26 19:24:13,942 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/adeb7945536c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:13,986 SpawnPoolWorker-41 DEBUG    upload finished in 1.672219s, attributes: file_id=a3907d8ce9b7
2025-05-26 19:24:13,986 SpawnPoolWorker-41 DEBUG    upload finished in 1.672767s, at

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:14,090 SpawnPoolWorker-40 DEBUG    upload finished in 1.464621s, attributes: file_id=5c3bf45d204a
2025-05-26 19:24:14,090 SpawnPoolWorker-40 DEBUG    upload finished in 1.465084s, attributes: file_id=5c3bf45d204a
upload:  14%|█▍        | 149/1056 [00:29<01:41,  8.92it/s]2025-05-26 19:24:14,092 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/929e2316c7a2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:15,118 SpawnPoolWorker-36 DEBUG    upload finished in 1.441009s, attributes: file_id=f3a329f50e66
2025-05-26 19:24:15,120 SpawnPoolWorker-36 DEBUG    upload finished in 1.44287s, attributes: file_id=f3a329f50e66
2025-05-26 19:24:15,123 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e7e389c392c9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:15,230 SpawnPoolWorker-39 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:15,438 SpawnPoolWorker-38 DEBUG    upload finished in 1.497388s, attributes: file_id=adeb7945536c
2025-05-26 19:24:15,438 SpawnPoolWorker-38 DEBUG    upload finished in 1.498093s, attributes: file_id=adeb7945536c
2025-05-26 19:24:15,442 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3c969fd40c4c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:15,465 SpawnPoolWorker-40 DEBUG    upload finished in 1.374084s, attributes: file_id=929e2316c7a2
2025-05-26 19:24:15,465 SpawnPoolWorker-40 DEBUG    upload finished in 1.374564s, attributes: file_id=929e2316c7a2
upload:  14%|█▍        | 153/1056 [00:30<03:02,  4.96it/s]2025-05-26 19:24:15,467 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:16,434 SpawnPoolWorker-36 DEBUG    upload finished in 1.311843s, attributes: file_id=e7e389c392c9
2025-05-26 19:24:16,436 SpawnPoolWorker-36 DEBUG    upload finished in 1.31408s, attributes: file_id=e7e389c392c9
2025-05-26 19:24:16,439 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/119f4bbcd29b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:16,513 SpawnPoolWorker-41 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:16,743 SpawnPoolWorker-34 DEBUG    upload finished in 1.206477s, attributes: file_id=fc6ab532f35a
2025-05-26 19:24:16,743 SpawnPoolWorker-34 DEBUG    upload finished in 1.207336s, attributes: file_id=fc6ab532f35a
2025-05-26 19:24:16,745 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d45173a4cc3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:16,795 SpawnPoolWorker-37 DEBUG    upload

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:17,810 SpawnPoolWorker-36 DEBUG    upload finished in 1.371741s, attributes: file_id=119f4bbcd29b
2025-05-26 19:24:17,814 SpawnPoolWorker-36 DEBUG    upload finished in 1.375799s, attributes: file_id=119f4bbcd29b
upload:  16%|█▌        | 166/1056 [00:33<03:12,  4.62it/s]2025-05-26 19:24:17,817 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c8af39ec793.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:17,853 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:18,091 SpawnPoolWorker-37 DEBUG    upload finished in 1.294158s, attributes: file_id=04841368ecad
2025-05-26 19:24:18,091 SpawnPoolWorker-39 DEBUG    upload finished in 1.456674s, attributes: file_id=e1c51cdd77cf
2025-05-26 19:24:18,091 SpawnPoolWorker-37 DEBUG    upload finished in 1.294775s, attributes: file_id=04841368ecad
2025-05-26 19:24:18,091 SpawnPoolWorker-39 DEBUG    upload finished in 1.457158s, attributes: file_id=e1c51cdd77cf
upload:  16%|█▌        | 170/1056 [00:33<02:11,  6.72it/s]2025-05-26 19:24:18,095 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0890673d2452.json not detected as batch file data
2025-05-26 19:24:18,095 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/576acc6525f4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:19,305 SpawnPoolWorker-41 DEBUG    upload finished in 1.450495s, attributes: file_id=fc020f401a58
2025-05-26 19:24:19,307 SpawnPoolWorker-41 DEBUG    upload finished in 1.452089s, attributes: file_id=fc020f401a58
upload:  16%|█▋        | 174/1056 [00:34<03:35,  4.09it/s]2025-05-26 19:24:19,311 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f8237ed58cd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:19,534 SpawnPoolWorker-39 DEBUG    upload finished in 1.440634s, attributes: file_id=0890673d2452
2025-05-26 19:24:19,535 SpawnPoolWorker-39 DEBUG    upload finished in 1.441611s, attributes: file_id=0890673d2452
2025-05-26 19:24:19,539 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/385595b0fe57.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:19,578 SpawnPoolWorker-35 DEBUG    upload finished in 1.36225s, attributes: file_id=ac9dd2c49c7f
2025-05-26 19:24:19,579 SpawnPoolWorker-35 DEBUG    upload finished in 1.362846s, attributes: file_id=ac9dd2c49c7f
upload:  17%|█▋        | 178/1056 [00:35<02:17,  6.37it/s]2025-05-26 19:24:19,581 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:20,629 SpawnPoolWorker-37 DEBUG    upload finished in 1.187567s, attributes: file_id=6c622251cd19
2025-05-26 19:24:20,630 SpawnPoolWorker-37 DEBUG    upload finished in 1.188957s, attributes: file_id=6c622251cd19
upload:  17%|█▋        | 182/1056 [00:36<02:59,  4.88it/s]2025-05-26 19:24:20,633 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/46399c8ea63f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:20,869 SpawnPoolWorker-41 DEBUG    upload finished in 1.560009s, attributes: file_id=1f8237ed58cd
2025-05-26 19:24:20,870 SpawnPoolWorker-41 DEBUG    upload finished in 1.560656s, attributes: file_id=1f8237ed58cd
upload:  18%|█▊        | 185/1056 [00:36<02:22,  6.10it/s]2025-05-26 19:24:20,871 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f51a53ff5b6b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:20,936 SpawnPoolWorker-35 DEBUG    upload finished in 1.355937s, attributes: file_id=ae753618b6ee
2025-05-26 19:24:20,936 SpawnPoolWorker-35 DEBUG    upload finished in 1.356425s, attributes: file_id=ae753618b6ee
2025-05-26 19:24:20,938 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:21,076 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f80e6ae7af95.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:21,142 SpawnPoolWorker-34 DEBUG    upload finished in 1.543871s, attributes: file_id=46cdd6a3de9d
2025-05-26 19:24:21,142 SpawnPoolWorker-34 DEBUG    upload finished in 1.544389s, attributes: file_id=46cdd6a3de9d
2025-05-26 19:24:21,144 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3de0d0385f2f.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:21,992 SpawnPoolWorker-36 DEBUG    upload finished in 1.269018s, attributes: file_id=3f45bf371b36
2025-05-26 19:24:21,992 SpawnPoolWorker-36 DEBUG    upload finished in 1.269633s, attributes: file_id=3f45bf371b36
upload:  18%|█▊        | 190/1056 [00:37<03:01,  4.78it/s]2025-05-26 19:24:21,995 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ea6f66e6f447.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:22,050 SpawnPoolWorker-37 DEBUG    upload finished in 1.418042s, attributes: file_id=46399c8ea63f
2025-05-26 19:24:22,051 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:22,240 SpawnPoolWorker-38 DEBUG    upload finished in 1.270457s, attributes: file_id=beb53689e803
2025-05-26 19:24:22,241 SpawnPoolWorker-38 DEBUG    upload finished in 1.271059s, attributes: file_id=beb53689e803
upload:  18%|█▊        | 195/1056 [00:37<01:53,  7.61it/s]2025-05-26 19:24:22,242 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d62cfdc7f56b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:22,269 SpawnPoolWorker-41 DEBUG    upload finished in 1.398739s, attributes: file_id=f51a53ff5b6b
2025-05-26 19:24:22,270 SpawnPoolWorker-41 DEBUG    upload finished in 1.399174s, attributes: file_id=f51a53ff5b6b
2025-05-26 19:24:22

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:23,273 SpawnPoolWorker-36 DEBUG    upload finished in 1.278976s, attributes: file_id=ea6f66e6f447
2025-05-26 19:24:23,274 SpawnPoolWorker-36 DEBUG    upload finished in 1.280331s, attributes: file_id=ea6f66e6f447
upload:  19%|█▉        | 198/1056 [00:38<02:54,  4.90it/s]2025-05-26 19:24:23,280 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f46caf92c79.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:23,384 SpawnPoolWorker-37 DEBUG    upload finished in 1.332088s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:23,486 SpawnPoolWorker-40 DEBUG    upload finished in 1.369663s, attributes: file_id=74ee6a3c66bc
2025-05-26 19:24:23,486 SpawnPoolWorker-40 DEBUG    upload finished in 1.370267s, attributes: file_id=74ee6a3c66bc
2025-05-26 19:24:23,489 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4a0f134d4f55.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:23,534 SpawnPoolWorker-39 DEBUG    upload finished in 1.464834s, attributes: file_id=c63a3523b2cd
2025-05-26 19:24:23,535 SpawnPoolWorker-39 DEBUG    upload finished in 1.465407s, attributes: file_id=c63a3523b2cd
2025-05-26 19:24:23,537 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1079fded4c9e.json n

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:24,553 SpawnPoolWorker-36 DEBUG    upload finished in 1.276374s, attributes: file_id=6f46caf92c79
2025-05-26 19:24:24,555 SpawnPoolWorker-36 DEBUG    upload finished in 1.278391s, attributes: file_id=6f46caf92c79
upload:  20%|█▉        | 206/1056 [00:40<02:44,  5.16it/s]2025-05-26 19:24:24,559 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/49129c2634b0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:24,763 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2cb11f86858b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:24,780 SpawnPoolWorker-40 DEBUG    upload finished in 1.291596s, attributes: file_id=4a0f134d4f55
2025-05-26 19:24:24,780 SpawnPoolWorker-40 DEBUG    upload finished in 1.292684s, attributes: file_id=4a0f134d4f55
upload:  20%|█▉        | 208/1056 [00:40<02:27,  5.73it/s]2025-05-26 19:24:24,784 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99ca60e61acb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-doc

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:24,967 SpawnPoolWorker-39 DEBUG    upload finished in 1.431134s, attributes: file_id=1079fded4c9e
2025-05-26 19:24:24,968 SpawnPoolWorker-39 DEBUG    upload finished in 1.431622s, attributes: file_id=1079fded4c9e
2025-05-26 19:24:24,971 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a12414dddbc8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:25,901 SpawnPoolWorker-36 DEBUG    upload finished in 1.343873s, attributes: file_id=49129c2634b0
2025-05-26 19:24:25,902 SpawnPoolWorker-36 DEBUG    upload finished in 1.344864s, attributes: file_id=49129c2634b0
upload:  20%|██        | 214/1056 [00:41<02:45,  5.10it/s]2025-05-26 19:24:25,906 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/30a089f2f6db.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:25,995 SpawnPoolWorker-34 DEBUG    upload finished in 1.232947s, attributes: file_id=2cb11f86858b
2025-05-26 19:24:25,995 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:26,109 SpawnPoolWorker-41 DEBUG    upload finished in 1.201844s, attributes: file_id=e3aac8436350
2025-05-26 19:24:26,110 SpawnPoolWorker-41 DEBUG    upload finished in 1.202617s, attributes: file_id=e3aac8436350
2025-05-26 19:24:26,113 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f30023bed632.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:26,145 SpawnPoolWorker-35 DEBUG    upload finished in 1.254034s, attributes: file_id=443e4835dac3
2025-05-26 19:24:26,146 SpawnPoolWorker-35 DEBUG    upload finished in 1.254554s, attributes: file_id=443e4835dac3
2025-05-26 19:24:26,147 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ca01c2b4007f.json n

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:27,223 SpawnPoolWorker-35 DEBUG    upload finished in 1.076508s, attributes: file_id=ca01c2b4007f
2025-05-26 19:24:27,226 SpawnPoolWorker-35 DEBUG    upload finished in 1.078889s, attributes: file_id=ca01c2b4007f
upload:  21%|██        | 222/1056 [00:42<02:45,  5.04it/s]2025-05-26 19:24:27,229 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/298ff4a3fa4a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:27,479 SpawnPoolWorker-40 DEBUG    upload finished in 1.42835s, attributes: file_id=aae8afabdf4c
2025-05-26 19:24:27,480 SpawnPoolWorker-40 DEBUG    upload finished in 1.429166s, attributes: file_id=aae8afabdf4c
upload:  22%|██▏       | 228/1056 [00:42<01:39,  8.35it/s]2025-05-26 19:24:27,483 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9306cc371888.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:27,511 SpawnPoolWorker-39 DEBUG    upload finished in 1.244045s, attributes: file_id=02a070a63f4f
2025-05-26 19:24:27,512 SpawnPoolWorker-39 DEBUG    upload finished in 1.24455s, attributes: file_id=02a070a63f4f
2025-05-26 19:24:27,513 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:28,599 SpawnPoolWorker-34 DEBUG    upload finished in 1.329094s, attributes: file_id=23dacb5e1e16
2025-05-26 19:24:28,602 SpawnPoolWorker-34 DEBUG    upload finished in 1.334924s, attributes: file_id=23dacb5e1e16
upload:  22%|██▏       | 230/1056 [00:44<03:04,  4.48it/s]2025-05-26 19:24:28,606 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cfd43ad838df.json not detected as batch file data
2025-05-26 19:24:28,676 SpawnPoolWorker-36 DEBUG    upload finished in 1.371287s, attributes: file_id=973e7dbc76d0
2025-05-26 19:24:28,677 SpawnPoolWorker-36 DEBUG    upload finished in 1.371926s, attributes: file_id=973e7dbc76d0
202

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:28,981 SpawnPoolWorker-40 DEBUG    upload finished in 1.498902s, attributes: file_id=9306cc371888
2025-05-26 19:24:28,981 SpawnPoolWorker-40 DEBUG    upload finished in 1.499734s, attributes: file_id=9306cc371888
upload:  22%|██▏       | 237/1056 [00:44<01:40,  8.11it/s]2025-05-26 19:24:28,984 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce092251f7bd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whites

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:29,862 SpawnPoolWorker-34 DEBUG    upload finished in 1.256911s, attributes: file_id=cfd43ad838df
2025-05-26 19:24:29,863 SpawnPoolWorker-34 DEBUG    upload finished in 1.258447s, attributes: file_id=cfd43ad838df
2025-05-26 19:24:29,865 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/822d138675c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:30,012 SpawnPoolWorker-36 DEBUG    upload finished in 1.333813s, attributes: file_id=a7443365bd74
2025-05-26 19:24:30,013 SpawnPoolWorker-36 DEBUG    upload finished in 1.334606s, attributes: file_id=a7443365bd74
upload:  23%|██▎       |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:30,229 SpawnPoolWorker-41 DEBUG    upload finished in 1.44303s, attributes: file_id=15d065614483
2025-05-26 19:24:30,230 SpawnPoolWorker-41 DEBUG    upload finished in 1.443871s, attributes: file_id=15d065614483
upload:  23%|██▎       | 241/1056 [00:45<02:30,  5.41it/s]2025-05-26 19:24:30,232 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5bfbf55db27d.json not detected as batch file data
2025-05-26 19:24:30,236 SpawnPoolWorker-38 DEBUG    upload finished in 1.4816s, attributes: file_id=b67f9ade92d6
2025-05-26 19:24:30,236 SpawnPoolWorker-38 DEBUG    upload finished in 1.482171s, attributes: file_id=b67f9ade92d6
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:30,238 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/u

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:31,296 SpawnPoolWorker-34 DEBUG    upload finished in 1.430803s, attributes: file_id=822d138675c5
2025-05-26 19:24:31,298 SpawnPoolWorker-34 DEBUG    upload finished in 1.433191s, attributes: file_id=822d138675c5
upload:  23%|██▎       | 246/1056 [00:46<02:55,  4.60it/s]2025-05-26 19:24:31,300 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ba10f04291cf.json not detected as batch file data
2025-05-26 19:24:31,500 SpawnPoolWorker-36 DEBUG    upload finished in 1.485112s, attributes: file_id=f88eded3b396
2025-05-26 19:24:31,501 SpawnPoolWorker-36 DEBUG    upload finished in 1.486486s, attributes: file_id=f88eded3b396


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:31,505 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f0fa0ea23c36.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:31,704 SpawnPoolWorker-35 DEBUG    upload finished in 1.40677s, attributes: file_id=d542a4ad8286
2025-05-26 19:24:31,705 SpawnPoolWorker-35 DEBUG    upload finished in 1.407248s, attributes: file_id=d542a4ad8286
upload:  23%|██▎       | 248/1056 [00:47<02:52,  4.68it/s]

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:31,707 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d7189a7100da.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:31,767 SpawnPoolWorker-40 DEBUG    upload finished in 1.388837s, attributes: file_id=15c4eb7ad76e
2025-05-26 19:24:31,768 SpawnPoolWorker-40 DEBUG    upload finished in 1.390248s, attributes: file_id=15c4eb7ad76e
2025-05-26 19:24:31,771 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6bd3a61a9eef.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:31,914 SpawnPoolWorker-41 DEBUG    upload finished in 1.682419s, attributes: file_id=5bfbf55db27d
2025-05-26 19:24:31,914 SpawnPoolWorker-41 DEBUG    upload finished in 1.682931s, attributes: file_id=5bfbf55db27d
2025-05-26 19:24:31,916 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51aa43ff4980.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:32,590 SpawnPoolWorker-34 DEBUG    upload finished in 1.290948s, attributes: file_id=ba10f04291cf
2025-05-26 19:24:32,591 SpawnPoolWorker-34 DEBUG    upload finished in 1.291868s, attributes: file_id=ba10f04291cf
upload:  24%|██▍       | 254/1056 [00:48<02:28,  5.38it/s]2025-05-26 19:24:32,594 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b29e6bad3f43.json not detected as batch file data


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:33,200 SpawnPoolWorker-40 DEBUG    upload finished in 1.429179s, attributes: file_id=6bd3a61a9eef
2025-05-26 19:24:33,201 SpawnPoolWorker-40 DEBUG    upload finished in 1.430337s, attributes: file_id=6bd3a61a9eef
upload:  24%|██▍       | 255/1056 [00:48<03:15,  4.09it/s]2025-05-26 19:24:33,204 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/45cfb3a1cf9f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:33,217 SpawnPoolWorker-35 DEBUG    upload finished in 1.510517s, attributes: file_id=d7189a7100da
2025-05-26 19:24:33,217 SpawnPoolWorker-35 DEBUG    upload finished in 1.511127s, attributes: file_id=d7189a7100da
2025-05-26 19:24:33

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:33,496 SpawnPoolWorker-37 DEBUG    upload finished in 1.639472s, attributes: file_id=086e76fff2bc
2025-05-26 19:24:33,496 SpawnPoolWorker-37 DEBUG    upload finished in 1.640387s, attributes: file_id=086e76fff2bc
upload:  25%|██▍       | 261/1056 [00:48<01:46,  7.49it/s]2025-05-26 19:24:33,499 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d6c269413d2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:33,904 SpawnPoolWorker-34 DEBUG    upload finished in 1.310106s, attributes: file_id=b29e6bad3f43
2025-05-26 19:24:33,904 SpawnPoolWorker-34 DEBUG    upload finished in 1.310733s, attributes: file_id=b29e6bad3f43
2025-05-26 19:24:33,905 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/27612a378b78.json not detected as batch file data


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:34,568 SpawnPoolWorker-40 DEBUG    upload finished in 1.364966s, attributes: file_id=45cfb3a1cf9f
2025-05-26 19:24:34,569 SpawnPoolWorker-40 DEBUG    upload finished in 1.366871s, attributes: file_id=45cfb3a1cf9f
upload:  25%|██▍       | 263/1056 [00:50<03:07,  4.23it/s]2025-05-26 19:24:34,578 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28dd9e363d25.json not detected as batch file data
2025-05-26 19:24:34,579 SpawnPoolWorker-35 DEBUG    upload finished in 1.360419s, attributes: file_id=35989320e8f1
2025-05-26 19:24:34,581 SpawnPoolWorker-35 DEBUG    upload finished in 1.363099s, attributes: file_id=35989320e8f1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:34

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:34,789 SpawnPoolWorker-36 DEBUG    upload finished in 1.426521s, attributes: file_id=23496335b411
2025-05-26 19:24:34,789 SpawnPoolWorker-36 DEBUG    upload finished in 1.427318s, attributes: file_id=23496335b411
upload:  25%|██▌       | 267/1056 [00:50<01:59,  6.61it/s]2025-05-26 19:24:34,792 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/711b8a6ff3d0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:34,865 SpawnPoolWorker-39 DEBUG    u

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:35,116 SpawnPoolWorker-34 DEBUG    upload finished in 1.211442s, attributes: file_id=27612a378b78
2025-05-26 19:24:35,118 SpawnPoolWorker-34 DEBUG    upload finished in 1.213475s, attributes: file_id=27612a378b78
2025-05-26 19:24:35,126 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/073546bddb5f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:35,810 SpawnPoolWorker-35 DEBUG    upload finished in 1.226167s, attributes: file_id=0457ba9a03fa
2025-05-26 19:24:35,811 SpawnPoolWorker-35 DEBUG    upload finished in 1.227482s, attributes: file_id=0457ba9a03fa
upload:  26%|██▌       | 271/1056 [00:51<02:51,  4.58it/s]2025-05-26 19:24:35,814 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/976e9c01e2f2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:36,044 SpawnPoolWorker-40 DEBUG    upload finished in 1.470595s, attributes: file_id=28dd9e363d25
2025-05-26 19:24:36,045 SpawnPoolWorker-40 DEBUG    upload finished in 1.471934s, attributes: file_id=28dd9e363d25
2025-05-26 19:24:36,048 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0cd01b114165.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:36,087 SpawnPoolWorker-36 DEBUG    upload finished in 1.29641s, attributes: file_id=711b8a6ff3d0
2025-05-26 19:24:36,088 SpawnPoolWorker-36 DEBUG    upload finished in 1.297036s, attributes: file_id=711b8a6ff3d0
upload:  26%|██▌       | 273/1056 [00:51<02:32,  5.13it/s]2025-05-26 19:24:36,090 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:36,289 SpawnPoolWorker-41 DEBUG    upload finished in 1.360189s, attributes: file_id=be1bf5793001
2025-05-26 19:24:36,289 SpawnPoolWorker-41 DEBUG    upload finished in 1.360921s, attributes: file_id=be1bf5793001
2025-05-26 19:24:36,291 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/191922a049c6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:36,380 SpawnPoolWorker-34 DEBUG    upload finished in 1.254704s, attributes: file_id=073546bddb5f
2025-05-26 19:24:36,381 SpawnPoolWorker-34 DEBUG    upload finished in 1.255652s, attributes: file_id=073546bddb5f
upload:  26%|██▋       | 278/1056 [00:51<01:31,  8.47it/s]2025-05-26 19:24:36,390 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:37,206 SpawnPoolWorker-35 DEBUG    upload finished in 1.393401s, attributes: file_id=976e9c01e2f2
2025-05-26 19:24:37,207 SpawnPoolWorker-35 DEBUG    upload finished in 1.394132s, attributes: file_id=976e9c01e2f2
2025-05-26 19:24:37,210 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/185df24eebb3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inp

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:37,546 SpawnPoolWorker-37 DEBUG    upload finished in 1.439457s, attributes: file_id=589e49fa4187
2025-05-26 19:24:37,548 SpawnPoolWorker-37 DEBUG    upload finished in 1.441937s, attributes: file_id=589e49fa4187
upload:  27%|██▋       | 280/1056 [00:53<03:08,  4.11it/s]2025-05-26 19:24:37,551 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/14c112a4e0bd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:37,564 SpawnPoolWorker-39 DEBUG    upload finished in 1.342323s, attributes: file_id=e18a80aa4ddc
2025-05-26 19:24:37,565 SpawnPoolWorker-39 DEBUG    upload finished in 1.343014s, attributes: file_id=e18a80aa4ddc
2025-05-26 19:24:37,569 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:38,607 SpawnPoolWorker-35 DEBUG    upload finished in 1.398396s, attributes: file_id=185df24eebb3
2025-05-26 19:24:38,608 SpawnPoolWorker-35 DEBUG    upload finished in 1.399489s, attributes: file_id=185df24eebb3
upload:  27%|██▋       | 287/1056 [00:54<02:30,  5.10it/s]2025-05-26 19:24:38,613 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c5396319a12c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:38,942 SpawnPoolWorker-34 DEBUG    upload finished in 1.222515s, attributes: file_id=b1d7e7b02f90
2025-05-26 19:24:38,942 SpawnPoolWorker-34 DEBUG    upload finished in 1.22318s, attributes: file_id=b1d7e7b02f90
2025-05-26 19:24:38,944 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d4e0901114a6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:38,995 SpawnPoolWorker-37 DEBUG    upload finished in 1.445372s, attributes: file_id=14c112a4e0bd
2025-05-26 19:24:38,996 SpawnPoolWorker-37 DEBUG    upload finished in 1.44585s, attributes: file_id=14c112a4e0bd
upload:  27%|██▋       | 289/1056 [00:54<02:29,  5.11it/s]2025-05-26 19:24:38,999 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:39,218 SpawnPoolWorker-38 DEBUG    upload finished in 1.596775s, attributes: file_id=a693c0be9c40
2025-05-26 19:24:39,218 SpawnPoolWorker-38 DEBUG    upload finished in 1.597196s, attributes: file_id=a693c0be9c40
2025-05-26 19:24:39,220 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1d8cbbea3e11.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:39,236 SpawnPoolWorker-36 DEBUG    upload finished in 1.548522s, attributes: file_id=95817f4b20ed
2025-05-26 19:24:39,237 SpawnPoolWorker-36 DEBUG    upload finished in 1.549101s, attributes: file_id=95817f4b20ed
upload:  28%|██▊       | 294/1056 [00:54<01:35,  8.02it/s]2025-05-26 19:24:39,238 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:39,946 SpawnPoolWorker-35 DEBUG    upload finished in 1.33526s, attributes: file_id=c5396319a12c
2025-05-26 19:24:39,948 SpawnPoolWorker-35 DEBUG    upload finished in 1.337657s, attributes: file_id=c5396319a12c
2025-05-26 19:24:39,952 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51af7ae581b0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:40,170 SpawnPoolWorker-41 DEBUG    upload finished in 1.139661s, attributes: file_id=68de3c3e3a7a
2025-05-26 19:24:40,170 SpawnPoolWorker-41 DEBUG    upload finished in 1.140474s, attributes: file_id=68de3c3e3a7a
upload:  28%|██▊       | 296/1056 [00:55<02:40,  4.73it/s]2025-05-26 19:24:40,173 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e54f8f3aef9e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:40,202 SpawnPoolWorker-34 DEBUG    upload finished in 1.258064s, attributes: file_id=d4e0901114a6
2025-05-26 19:24:40,202 SpawnPoolWorker-34 DEBUG    upload finished in 1.258515s, attributes: file_id=d4e0901114a6
2025-05-26 19:24:40,203 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:40,378 SpawnPoolWorker-36 DEBUG    upload finished in 1.139726s, attributes: file_id=75a1dc88b32d
2025-05-26 19:24:40,378 SpawnPoolWorker-36 DEBUG    upload finished in 1.140315s, attributes: file_id=75a1dc88b32d
2025-05-26 19:24:40,380 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c7df2df72a34.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:41,326 SpawnPoolWorker-34 DEBUG    upload finished in 1.122921s, attributes: file_id=af4e8027a422
2025-05-26 19:24:41,327 SpawnPoolWorker-34 DEBUG    upload finished in 1.124091s, attributes: file_id=af4e8027a422
upload:  29%|██▊       | 303/1056 [00:56<02:29,  5.02it/s]2025-05-26 19:24:41,331 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1bf080e6ede9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:41,446 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:41,537 SpawnPoolWorker-39 DEBUG    upload finished in 1.327947s, attributes: file_id=23f9d562b57f
2025-05-26 19:24:41,538 SpawnPoolWorker-39 DEBUG    upload finished in 1.328363s, attributes: file_id=23f9d562b57f
2025-05-26 19:24:41,539 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd669a56c585.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:41,593 SpawnPoolWorker-40 DEBUG    upload finished in 1.346838s, attributes: file_id=86c8a80f3dd6
2025-05-26 19:24:41,594 SpawnPoolWorker-40 DEBUG    upload finished in 1.34742s, attributes: file_id=86c8a80f3dd6
2025-05-26 19:24:41,596 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f40b6481a9cd.json no

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:41,771 SpawnPoolWorker-35 DEBUG    upload finished in 1.820301s, attributes: file_id=51af7ae581b0
2025-05-26 19:24:41,771 SpawnPoolWorker-35 DEBUG    upload finished in 1.820934s, attributes: file_id=51af7ae581b0
2025-05-26 19:24:41,773 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9cd4f3ffcc72.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:41,925 SpawnPoolWorker-36 DEBUG    upload finished in 1.545179s, attributes: file_id=c7df2df72a34
2025-05-26 19:24:41,925 SpawnPoolWorker-36 DEBUG    upload finished in 1.545675s, attr

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:42,822 SpawnPoolWorker-34 DEBUG    upload finished in 1.492769s, attributes: file_id=1bf080e6ede9
2025-05-26 19:24:42,823 SpawnPoolWorker-37 DEBUG    upload finished in 1.375259s, attributes: file_id=513729de0771
2025-05-26 19:24:42,823 SpawnPoolWorker-34 DEBUG    upload finished in 1.493969s, attributes: file_id=1bf080e6ede9
2025-05-26 19:24:42,824 SpawnPoolWorker-37 DEBUG    upload finished in 1.376317s, attributes: file_id=513729de0771
upload:  30%|██▉       | 312/1056 [00:58<02:38,  4.70it/s]2025-05-26 19:24:42,826 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e96a7fb4d747.json not detected as batch file data
2025-05-26 19:24:42,827 SpawnPoolWorker-37 DEBUG    /Use

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:43,159 SpawnPoolWorker-36 DEBUG    upload finished in 1.23263s, attributes: file_id=82cde931c08d
2025-05-26 19:24:43,159 SpawnPoolWorker-36 DEBUG    upload finished in 1.233141s, attributes: file_id=82cde931c08d
upload:  30%|███       | 317/1056 [00:58<01:44,  7.08it/s]Removed trailing semicolon and whitespace from query
2025-05-26 19:24:43,161 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/adae3e43c755.json not detected as batch file data
2025-05-26 19:24:43,163 SpawnPoolWorker-35 DEBUG    upload finished in 1.390693s, attributes: file_id=9cd4f3ffcc72
2025-05-26 19:24:43,163 SpawnPoolWorker-35 DEBUG    upload finished in 1.391142s, attributes: file_id=9cd4f3ffcc72
A value is trying to

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:43,983 SpawnPoolWorker-37 DEBUG    upload finished in 1.157237s, attributes: file_id=fe9b64c90aea
2025-05-26 19:24:43,985 SpawnPoolWorker-37 DEBUG    upload finished in 1.159521s, attributes: file_id=fe9b64c90aea
upload:  30%|███       | 319/1056 [00:59<02:37,  4.69it/s]2025-05-26 19:24:43,989 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d1dbdfc5c8e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:44,015 SpawnPoolWorker-40 DEBUG    upload finished in 1.095262s, attributes: file_id=f8b7a807bf1d
2025-05-26 19:24:44,016 SpawnPoolWorker-40 DEBUG    upload finished in 1.096046s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:44,269 SpawnPoolWorker-35 DEBUG    upload finished in 1.104602s, attributes: file_id=6be3dac8956d
2025-05-26 19:24:44,269 SpawnPoolWorker-35 DEBUG    upload finished in 1.105013s, attributes: file_id=6be3dac8956d
upload:  31%|███       | 325/1056 [00:59<01:29,  8.18it/s]2025-05-26 19:24:44,271 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/243aac6fb32a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:44,334 SpawnPoolWorker-36 DEBUG    upload finished in 1.173371s, attributes: file_id=adae3e43c755
Removed trailing semicolon a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:45,093 SpawnPoolWorker-38 DEBUG    upload finished in 1.05408s, attributes: file_id=c1942b9f9ed4
2025-05-26 19:24:45,094 SpawnPoolWorker-38 DEBUG    upload finished in 1.055416s, attributes: file_id=c1942b9f9ed4
upload:  31%|███       | 327/1056 [01:00<02:22,  5.12it/s]2025-05-26 19:24:45,097 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/540547e33aee.json not detected as batch file data
2025-05-26 19:24:45,117 SpawnPoolWorker-37 DEBUG    upload finished in 1.129217s, attributes: file_id=9d1dbdfc5c8e
2025-05-26 19:24:45,118 SpawnPoolWorker-37 DEBUG    upload finished in 1.129885s, attributes: file_id=9d1dbdfc5c8e
2025-05-26 19:24:45,119 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c9411fa9012e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:45,303 SpawnPoolWorker-34 DEBUG    upload finished in 1.202294s, attributes: file_id=a3de473e86ba
2025-05-26 19:24:45,303 SpawnPoolWorker-34 DEBUG    upload finished in 1.202812s, attributes: file_id=a3de473e86ba
2025-05-26 19:24:45,304 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/568096d4c1af.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:45,417 SpawnPoolWorker-35 DEBUG    upload finished in 1.147047s, attributes: file_id=243aac6fb32a
2025-05-26 19:24:45,418 SpawnPoolWorker-35 DEBUG    upload finished in 1.147586s, attributes: file_id=243aac6fb32a
upload:  32%|███▏      | 333/1056 [01:00<01:23,  8.66it/s]2025-05-26 19:24:45,420 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:45,521 SpawnPoolWorker-36 DEBUG    upload finished in 1.18595s, attributes: file_id=9dcdccd1d4c0
2025-05-26 19:24:45,522 SpawnPoolWorker-36 DEBUG    upload finished in 1.18641s, attributes: file_id=9dcdccd1d4c0
2025-05-26 19:24:45,523 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a02d98dd885a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:46,175 SpawnPoolWorker-38 DEBUG    upload finished in 1.079574s, attributes: file_id=540547e33aee
2025-05-26 19:24:46,176 SpawnPoolWorker-38 DEBUG    upload finished in 1.080148s, attributes: file_id=540547e33aee
upload:  32%|███▏      | 335/1056 [01:01<02:09,  5.59it/s]2025-05-26 19:24:46,178 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/21480e4d1631.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:46,458 SpawnPoolWorker-37 DEBUG    upload finished in 1.339596s, attributes: file_id=c9411fa9012e
2025-05-26 19:24:46,459 SpawnPoolWorker-37 DEBUG    upload finished in 1.3401s, attributes: file_id=c9411fa9012e
2025-05-26 19:24:46,461 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/841b4240d5d1.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:46,536 SpawnPoolWorker-35 DEBUG    upload finished in 1.116769s, attributes: file_id=a22424d1c645
2025-05-26 19:24:46,536 SpawnPoolWorker-35 DEBUG    upload finished in 1.11732s, attributes: file_id=a22424d1c645
upload:  32%|███▏      | 337/1056 [01:02<02:08,  5.58it/s]2025-05-26 19:24:46,538 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/70acf6e55eab.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:46,730 SpawnPoolWorker-34 DEBUG    upload finished in 1.425845s, attributes: file_id=568096d4c1af
upload:  32%|███▏      | 341/1056 [01:02<01:28,  8.06it/s]2025-05-26 19:24:46,730 SpawnPoolWorker-34 DEBUG    upload finished in 1.426252s, attributes: file_id=568096d4c1af
2025-05-26 19:24:46,733 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/de887096afec.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:46,786 SpawnPoolWorker-36 DEBUG    upload finished in 1.263108s, attributes: file_id=a02d98dd885a
2025-05-26 19:24:46,786 SpawnPoolWorker-36 DEBUG    upload finished in 1.263574s, attributes: file_id=a02d98dd885a
2025-05-26 19:24:46

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:47,351 SpawnPoolWorker-38 DEBUG    upload finished in 1.172877s, attributes: file_id=21480e4d1631
2025-05-26 19:24:47,351 SpawnPoolWorker-38 DEBUG    upload finished in 1.173786s, attributes: file_id=21480e4d1631
upload:  32%|███▏      | 343/1056 [01:02<01:59,  5.98it/s]2025-05-26 19:24:47,355 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d9ef1073131e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:47,668 SpawnPoolWorker-37 DEBUG    upload finished in 1.207742s, attributes: file_id=841b4240d5d1
2025-05-26 19:24:47,669 SpawnPoolWorker-37 DEBUG    upload finished in 1.208574s, attributes: file_id=841b4240d5d1
2025-05-26 19:24:47,672 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/12bb6e5fbec1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:47,756 SpawnPoolWorker-40 DEBUG    upload finished in 1.165234s, attributes: file_id=54de75a0d4e1
2025-05-26 19:24:47,757 SpawnPoolWorker-40 DEBUG    upload finished in 1.165946s, attributes: file_id=54de75a0d4e1
upload:  33%|███▎      | 345/1056 [01:03<02:05,  5.68it/s]2025-05-26 19:24:47

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:47,880 SpawnPoolWorker-34 DEBUG    upload finished in 1.148737s, attributes: file_id=de887096afec
2025-05-26 19:24:47,881 SpawnPoolWorker-34 DEBUG    upload finished in 1.149351s, attributes: file_id=de887096afec
upload:  33%|███▎      | 349/1056 [01:03<01:21,  8.64it/s]2025-05-26 19:24:47,884 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f7c71393dd4b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:47,940 SpawnPoolWorker-36 DEBUG    upload finished in 1.152794s, attributes: file_id=22561ed37c5a
2025-05-26 19:24:47,941 SpawnPoolWorker-36 DEBUG    upload finished in 1.153976s, attributes: file_id=22561ed37c5a
2025-05-26 19:24:47,944 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:48,640 SpawnPoolWorker-38 DEBUG    upload finished in 1.286656s, attributes: file_id=d9ef1073131e
2025-05-26 19:24:48,641 SpawnPoolWorker-38 DEBUG    upload finished in 1.288413s, attributes: file_id=d9ef1073131e
upload:  33%|███▎      | 351/1056 [01:04<02:04,  5.67it/s]2025-05-26 19:24:48,644 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d250f1a274c1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:48,877 SpawnPoolWorker-37 DEBUG    upload finished in 1.205913s, attributes: file_id=12bb6e5fbec1
2025-05-26 19:24:48,877 SpawnPoolWorker-37 DEBUG    upload finished in 1.206674s, attributes: file_id=12bb6e5fbec1
2025-05-26 19:24:48,879 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd01b8f4f649.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:48,896 SpawnPoolWorker-39 DEBUG    upload finished in 1.119806s, attributes: file_id=9871cae03383
2025-05-26 19:24:48,896 SpawnPoolWorker-39 DEBUG    upload finished in 1.1204s, attributes: file_id=9871cae03383
upload:  33%|███▎      | 353/1056 [01:04<01:55,  6.09it/s]2025-05-26 19:24:48,898 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:49,088 SpawnPoolWorker-35 DEBUG    upload finished in 1.307657s, attributes: file_id=dfff349ef510
2025-05-26 19:24:49,088 SpawnPoolWorker-35 DEBUG    upload finished in 1.308306s, attributes: file_id=dfff349ef510
2025-05-26 19:24:49,090 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51002af197f9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:50,079 SpawnPoolWorker-34 DEBUG    upload finished in 1.086915s, attributes: file_id=940faae81b1a
2025-05-26 19:24:50,080 SpawnPoolWorker-34 DEBUG    upload finished in 1.087596s, attributes: file_id=940faae81b1a
2025-05-26 19:24:50,080 SpawnPoolWorker-38 DEBUG    upload finished in 1.436378s, attributes: file_id=d250f1a274c1
upload:  34%|███▍      | 359/1056 [01:05<02:17,  5.08it/s]2025-05-26 19:24:50,080 SpawnPoolWorker-38 DEBUG    upload finished in 1.437097s, attributes: file_id=d250f1a274c1
2025-05-26 19:24:50,084 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0a1aec75e0b1.json not detected as batch file data
2025-05-26 19:24:50,084 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0c12a9467428.json not detected as batch file data
A value is trying to be set on a copy of a slice from 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:50,333 SpawnPoolWorker-40 DEBUG    upload finished in 1.262725s, attributes: file_id=585d12e985f0
2025-05-26 19:24:50,333 SpawnPoolWorker-41 DEBUG    upload finished in 1.34087s, attributes: file_id=d7c3784ec29e
2025-05-26 19:24:50,333 SpawnPoolWorker-40 DEBUG    upload finished in 1.263264s, attributes: file_id=585d12e985f0
upload:  34%|███▍      | 361/1056 [01:05<02:05,  5.55it/s]2025-05-26 19:24:50,334 SpawnPoolWorker-41 DEBUG    upload finished in 1.341413s, attributes: file_id=d7c3784ec29e
2025-05-26 19:24:50,336 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b3606295e01b.json not detected as batch file data
2025-05-26 19:24:50,336 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c33bb55b246c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:50,648 SpawnPoolWorker-37 DEBUG    upload finished in 1.769815s, attributes: file_id=fd01b8f4f649
2025-05-26 19:24:50,649 SpawnPoolWorker-37 DEBUG    upload finished in 1.770485s, attributes: file_id=fd01b8f4f649
2025-05-26 19:24:50,651 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/247928580316.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:51,449 SpawnPoolWorker-38 DEBUG    upload finished in 1.366873s, attributes: file_id=0a1aec75e0b1
2025-05-26 19:24:51,450 SpawnPoolWorker-38 DEBUG    upload finished in 1.367796s, attributes: file_id=0a1aec75e0b1
upload:  35%|███▍      | 367/1056 [01:06<02:19,  4.93it/s]2025-05-26 19:24:51,454 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0b2ba82be3bb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:51,464 SpawnPoolWorker-34 DEBUG    upload finished in 1.381793s, attributes: file_id=0c12a9467428
2025-05-26 19:24:51,465 SpawnPoolWorker-34 DEBUG    upload finished in 1.382639s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:51,725 SpawnPoolWorker-37 DEBUG    upload finished in 1.074726s, attributes: file_id=247928580316
2025-05-26 19:24:51,726 SpawnPoolWorker-37 DEBUG    upload finished in 1.075385s, attributes: file_id=247928580316
upload:  35%|███▌      | 371/1056 [01:07<01:38,  6.99it/s]2025-05-26 19:24:51,727 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c56b7a8cde5e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:51,809 SpawnPoolWorker-35 DEBUG    upload finished in 1.30764s, attributes: file_id=16a91ecbd7ee
2025-05-26 19:24:51,809 SpawnPoolWorker-35 DEBUG    upload finished in 1.308126s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:52,571 SpawnPoolWorker-38 DEBUG    upload finished in 1.117928s, attributes: file_id=0b2ba82be3bb
2025-05-26 19:24:52,571 SpawnPoolWorker-38 DEBUG    upload finished in 1.118885s, attributes: file_id=0b2ba82be3bb
upload:  36%|███▌      | 375/1056 [01:08<02:07,  5.35it/s]2025-05-26 19:24:52,574 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74ac23f25054.json not detected as batch file data
2025-05-26 19:24:52,612 SpawnPoolWorker-34 DEBUG    upload finished in 1.144318s, attributes: file_id=a4f93480a74d
2025-05-26 19:24:52,612 SpawnPoolWorker-34 DEBUG    upload finished in 1.145453s, attributes: file_id=a4f93480a74d
2025-05-26 19:24:52,616 SpawnPoolWorker-34 DEBUG    /Use

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:52,776 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/da337758756b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:52,862 SpawnPoolWorker-35 DEBUG    upload finished in 1.052451s, attributes: file_id=f8f0043106e3
2025-05-26 19:24:52,862 SpawnPoolWorker-39 DEBUG    upload finished in 1.217993s, attributes: file_id=dd43df9711d9
2025-05-26 19:24:52,863 SpawnPoolWorker-35 DEBUG    upload finished in 1.0531s, attributes: file_id=f8f0043106e3
2025-05-26 19:24:52,863 SpawnPoolWorker-39 DEBUG    upload finished in 1.218763s, attributes: file_id=dd43df9711d9
2025-05-26 19:24:52,865 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f2100ff8d77d.json not

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:24:53,093 SpawnPoolWorker-40 DEBUG    upload finished in 1.216083s, attributes: file_id=2f3a8fa6ea6e
2025-05-26 19:24:53,093 SpawnPoolWorker-40 DEBUG    upload finished in 1.21653s, attributes: file_id=2f3a8fa6ea6e
2025-05-26 19:24:53,095 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5ff4c31d0a0e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:53,103 SpawnPoolWorker-41 DEBUG    upload finished in 1.225774s, attributes: file_id=3f39f96be5d8
2025-05-26 19:24:53,103 SpawnPoolWorker-41 DEBUG    upload finished in 1.226326s, attributes: file_id=3f39f96be5d8
upload:  36%|███▌      | 382/1056 [01:08<01:16,  8.83it/s]2025-05-26 19:24:53,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:53,683 SpawnPoolWorker-34 DEBUG    upload finished in 1.068232s, attributes: file_id=cfc3199af611
2025-05-26 19:24:53,684 SpawnPoolWorker-34 DEBUG    upload finished in 1.068836s, attributes: file_id=cfc3199af611
2025-05-26 19:24:53,686 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/40ea5c74f63f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:53,731 SpawnPoolWorker-38 DEBUG    upload finished in 1.156879s, attributes: file_id=74ac23f25054
2025-05-26 19:24:53,731 SpawnPoolWorker-38 DEBUG    upload finished in 1.157664s, attributes: file_id=74ac23f25054
upload:  36%|███▋      |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:53,998 SpawnPoolWorker-36 DEBUG    upload finished in 1.223776s, attributes: file_id=da337758756b
2025-05-26 19:24:53,999 SpawnPoolWorker-36 DEBUG    upload finished in 1.224813s, attributes: file_id=da337758756b
2025-05-26 19:24:54,001 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d5009f562be0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:54,009 SpawnPoolWorker-35 DEBUG    upload finished in 1.144447s, attributes: file_id=f2100ff8d77d
2025-05-26 19:24:54,009 SpawnPoolWorker-35 DEBUG    upload finished in 1.144902s, attributes: file_id=f2100ff8d77d
upload:  37%|███▋      | 386/1056 [01:09<01:47,  6.24it/s]2025-05-26 19:24:54,011 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:54,271 SpawnPoolWorker-40 DEBUG    upload finished in 1.176517s, attributes: file_id=5ff4c31d0a0e
2025-05-26 19:24:54,271 SpawnPoolWorker-40 DEBUG    upload finished in 1.176996s, attributes: file_id=5ff4c31d0a0e
upload:  37%|███▋      | 389/1056 [01:09<01:25,  7.84it/s]2025-05-26 19:24:54,274 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b3b5335ec3a0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:54,313 SpawnPoolWorker-41 DEBUG    upload finished in 1.209374s, attributes: file_id=b6f431559a13
2025-05-26 19:24:54,314 SpawnPoolWorker-41 DEBUG    upload finished in 1.209817s, attributes: file_id=b6f431559a13
2025-05-26 19:24:54,315 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:55,088 SpawnPoolWorker-38 DEBUG    upload finished in 1.356501s, attributes: file_id=f90e3b50768a
2025-05-26 19:24:55,089 SpawnPoolWorker-38 DEBUG    upload finished in 1.356968s, attributes: file_id=f90e3b50768a
upload:  37%|███▋      | 391/1056 [01:10<02:23,  4.62it/s]2025-05-26 19:24:55,091 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ee947cc095f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:55,416 SpawnPoolWorker-39 DEBUG    upload finished in 1.303971s, attributes: file_id=dc566d48be6b
2025-05-26 19:24:55,417 SpawnPoolWorker-39 DEBUG    upload finished in 1.305181s, attributes: file_id=dc566d48be6b
upload:  37%|███▋      | 393/1056 [01:10<02:12,  4.99it/s]2025-05-26 19:24:55,420 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/78c4f3f5a842.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:55,535 SpawnPoolWorker-35 DEBUG    upload finished in 1.524642s, attributes: file_id=b9e12597af26
2025-05-26 19:24:55,536 SpawnPoolWorker-35 DEBUG    upload finished in 1.525913s, attributes: file_id=b9e12597af26
upload:  37%|███▋  

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:55,668 SpawnPoolWorker-36 DEBUG    upload finished in 1.668413s, attributes: file_id=d5009f562be0
2025-05-26 19:24:55,668 SpawnPoolWorker-36 DEBUG    upload finished in 1.6689s, attributes: file_id=d5009f562be0
upload:  38%|███▊      | 396/1056 [01:11<01:35,  6.91it/s]2025-05-26 19:24:55,672 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9af92fa22dd2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:55,763 SpawnPoolWorker-41 DEBUG    upload finished in 1.447764s, attributes: file_id=a8aa3f9571c9
2025-05-26 19:24:55,763 SpawnPoolWorker-41 DEBUG    upload finished in 1.448451s, attributes: file_id=a8aa3f9571c9
2025-05-26 19:24:55,765 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:56,483 SpawnPoolWorker-38 DEBUG    upload finished in 1.392606s, attributes: file_id=ee947cc095f3
2025-05-26 19:24:56,483 SpawnPoolWorker-38 DEBUG    upload finished in 1.393299s, attributes: file_id=ee947cc095f3
2025-05-26 19:24:56,487 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a31498b7c302.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:56,683 SpawnPoolWorker-34

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:56,691 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/af4acfc9b601.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:56,872 SpawnPoolWorker-39 DEBUG    upload finished in 1.453532s, attributes: file_id=78c4f3f5a842
2025-05-26 19:24:56,873 SpawnPoolWorker-39 DEBUG    upload finished in 1.454392s, attributes: file_id=78c4f3f5a842
upload:  38%|███▊      | 401/1056 [01:12<02:20,  4.67it/s]2025-05-26 19:24:56,877 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/33b86cde3337.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:56,901 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/896955dd5898.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:56,997 SpawnPoolWorker-35 DEBUG    upload finished in 1.458403s, attributes: file_id=9f058d20565d
2025-05-26 19:24:56,997 SpawnPoolWorker-35 DEBUG    upload finished in 1.458975s, attributes: file_id=9f058d20565d
upload:  38%|███▊      | 403/1056 [01:12<01:45,  6.20it/s]2025-05-26 19:24:56,999 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b68e3371d6cf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-doc

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:57,144 SpawnPoolWorker-41 DEBUG    upload finished in 1.379834s, attributes: file_id=3f22ccb719d1
2025-05-26 19:24:57,145 SpawnPoolWorker-41 DEBUG    upload finished in 1.380472s, attributes: file_id=3f22ccb719d1
upload:  38%|███▊      | 405/1056 [01:12<01:26,  7.55it/s]2025-05-26 19:24:57,148 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c28ec5ee1049.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:57,246 SpawnPoolWorker-40 DEBUG    upload finished in 1.403841s, attributes: file_id=9fa8ddf4dd2c
2025-05-26 19:24:57,247 SpawnPoolWorker-40 DEBUG    upload finished in 1.404435s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:57,678 SpawnPoolWorker-38 DEBUG    upload finished in 1.193299s, attributes: file_id=a31498b7c302
2025-05-26 19:24:57,679 SpawnPoolWorker-38 DEBUG    upload finished in 1.193957s, attributes: file_id=a31498b7c302
upload:  39%|███▊      | 407/1056 [01:13<01:53,  5.69it/s]2025-05-26 19:24:57,681 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b5068984cd1d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:24:58,005 SpawnPoolWorker-34 DEBUG    upload finished in 1.314695s, attributes: file_id=af4acfc9b601
2025-05-26 19:24:58,006 SpawnPoolWorker-34 DEBUG    upload finished in 1.315607s, attributes: file_id=af4acfc9b601
upload:  39%|███▊      | 408/1056 [01:13<02:12,  4.91it/s]2025-05-26 19:24:58,009 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0140d6f61a26.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:58,062 SpawnPoolWorker-39 DEBUG    upload finished in 1.186354s, attributes: file_id=33b86cde3337
2025-05-26 19:24:58,062 SpawnPoolWorker-39 DEBUG    upload finished in 1.18696s, attributes: file_id=33b86cde3337
2025-05-26 19:24:58,063 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:58,287 SpawnPoolWorker-40 DEBUG    upload finished in 1.039193s, attributes: file_id=31885037b214
2025-05-26 19:24:58,288 SpawnPoolWorker-40 DEBUG    upload finished in 1.040115s, attributes: file_id=31885037b214
2025-05-26 19:24:58,290 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/64b11cafa23a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:58,345 SpawnPoolWorker-36 DEBUG    upload finished in 1.253384s, attributes: file_id=e14135d42

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:24:58,480 SpawnPoolWorker-35 DEBUG    upload finished in 1.481828s, attributes: file_id=b68e3371d6cf
2025-05-26 19:24:58,480 SpawnPoolWorker-35 DEBUG    upload finished in 1.482334s, attributes: file_id=b68e3371d6cf
upload:  39%|███▉      | 414/1056 [01:13<01:12,  8.81it/s]2025-05-26 19:24:58,482 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed0bf2a43abd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:58,895 SpawnPoolWorker-38 DEBUG    upload finished in 1.21497s, attributes: file_id=b5068984cd1d
2025-05-26 19:24:58,895 SpawnPoolWorker-38 DEBUG    upload finished in 1.215563s, attributes: file_id=b5068984cd1d
2025-05-26 19:24:58,897 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3a0af0f23200.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:24:59,207 SpawnPoolWorker-39 DEBUG    upload finished in 1.144056s, attributes: file_id=67770b6ff6ed
2025-05-26 19:24:59,207 SpawnPoolWorker-39 DEBUG    upload finished in 1.144514s, attributes: file_id=67770b6ff6ed
upload:  39%|███▉      | 416/1056 [01:14<02:03,  5.20it/s]2025-05-26 19:24:59,209 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82e3929a782e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:59,338 SpawnPoolWorker-34 DEBUG    upload finished in 1.32974s, attributes: file_id=0140d6f61a26
2025-05-26 19:24:59,338 SpawnPoolWorker-34 DEBUG    upload finished in 1.330699s, attributes: file_id=0140d6f61a26
upload:  39%|███▉   

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:59,622 SpawnPoolWorker-36 DEBUG    upload finished in 1.2743s, attributes: file_id=f75e4c9270a7
2025-05-26 19:24:59,622 SpawnPoolWorker-36 DEBUG    upload finished in 1.27498s, attributes: file_id=f75e4c9270a7
upload:  40%|███▉      | 418/1056 [01:15<02:09,  4.94it/s]2025-05-26 19:24:59,624 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/561b12ac3167.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:24:59,644 SpawnPoolWorker-37 DEBUG    upload finished in 1.437965s, attributes: file_id=29905403403f
2025-05-26 19:24:59,645 SpawnPoolWorker-37 DEBUG    upload finished in 1.438498s, at

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:24:59,994 SpawnPoolWorker-38 DEBUG    upload finished in 1.097296s, attributes: file_id=3a0af0f23200
2025-05-26 19:24:59,994 SpawnPoolWorker-38 DEBUG    upload finished in 1.097834s, attributes: file_id=3a0af0f23200
upload:  40%|████      | 423/1056 [01:15<01:16,  8.31it/s]2025-05-26 19:24:59,996 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53e104d85fb7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:00,417 SpawnPoolWorker-39 DEBUG    upload finished in 1.208119s, attributes: file_id=82e3929a782e
2025-05-26 19:25:00,417 SpawnPoolWorker-39 DEBUG    upload finished in 1.208653s, attributes: file_id=82e3929a782e
2025-05-26 19:25:00,419 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c5e61c3c1731.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:00,509 SpawnPoolWorker-34 DEBUG    upload finished in 1.168876s, attributes: file_id=20be6af929d7
2025-05-26 19:25:00,510 SpawnPoolWorker-34 DEBUG    upload finished in 1.169579s, attributes: file_id=20be6af929d7
upload:  40%|████      | 425/1056 [01:16<01:41,  6.20it/s]

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:00,511 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e5657d5719ee.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:00,768 SpawnPoolWorker-41 DEBUG    upload finished in 1.024567s, attributes: file_id=62762d4ef6ab
2025-05-26 19:25:00,769 SpawnPoolWorker-41 DEBUG    upload finished in 1.025324s, attributes: file_id=62762d4ef6ab
upload:  40%|████      | 426/1056 [01:16<01:52,  5.61it/s]2025-05-26 19:25:00,772 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/11ded53052fe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:00,835 SpawnPoolWorker-36 DEBUG    upload finished in 1.211113s, attrib

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:25:01,015 SpawnPoolWorker-35 DEBUG    upload finished in 1.271129s, attributes: file_id=5a7671087de8
2025-05-26 19:25:01,015 SpawnPoolWorker-35 DEBUG    upload finished in 1.271836s, attributes: file_id=5a7671087de8
2025-05-26 19:25:01,016 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2e3ddd48e711.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:01,216 SpawnPoolWorker-38 DEBUG    upload finished in 1.221195s, attributes: file_id=53e104d85fb7
2025-05-26 19:25:01,217 SpawnPoolWorker-38 DEBUG    upload finished in 1.22163s, attri

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:01,219 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6983dd59d635.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:01,665 SpawnPoolWorker-39 DEBUG    upload finished in 1.24633s, attributes: file_id=c5e61c3c1731
2025-05-26 19:25:01,665 SpawnPoolWorker-39 DEBUG    upload finished in 1.246918s, attributes: file_id=c5e61c3c1731
upload:  41%|████      | 432/1056 [01:17<01:54,  5.45it/s]2025-05-26 19:25:01,668 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4556aa37dc0f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:01,686 SpawnPoolWorker-34 DEBUG    upload finished in 1.175008s, attributes: file_id=e5657d5719ee
2025-05-26 19:25:01,686 SpawnPoolWorker-34 DEBUG    upload finished in 1.175519s, attributes: file_id=e5657d5719ee
2025-05-26 19:25:01,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:01,992 SpawnPoolWorker-41 DEBUG    upload finished in 1.220847s, attributes: file_id=11ded53052fe
2025-05-26 19:25:01,993 SpawnPoolWorker-41 DEBUG    upload finished in 1.221951s, attributes: file_id=11ded53052fe
upload:  41%|████      | 434/1056 [01:17<01:50,  5.65it/s]2025-05-26 19:25:01,997 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6f0d651d1cf0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:02,103 SpawnPoolWorker-37 DEBUG    upload finished in 1.242151s, attributes: file_id=3dc7bb186dea
2025-05-26 19:25:02,103 SpawnPoolWorker-37 DEBUG    upload finished in 1.242608s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:02,311 SpawnPoolWorker-38 DEBUG    upload finished in 1.092462s, attributes: file_id=6983dd59d635
2025-05-26 19:25:02,311 SpawnPoolWorker-38 DEBUG    upload finished in 1.09322s, attributes: file_id=6983dd59d635
upload:  41%|████▏     | 437/1056 [01:17<01:28,  6.99it/s]2025-05-26 19:25:02,314 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bbafe4f4ae02.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:02,373 SpawnPoolWorker-40 DEBUG    upload finished in 1.403959s, attributes: file_id=587351a0e2fa
2025-05-26 19:25:02,373 SpawnPoolWorker-40 DEBUG    upload finished in 1.404485s, attributes: file_id=587351a0e2fa
2025-05-26 19:25:02,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:02,801 SpawnPoolWorker-34 DEBUG    upload finished in 1.113547s, attributes: file_id=8665ecc712b6
2025-05-26 19:25:02,801 SpawnPoolWorker-34 DEBUG    upload finished in 1.114032s, attributes: file_id=8665ecc712b6
upload:  42%|████▏     | 440/1056 [01:18<01:33,  6.59it/s]2025-05-26 19:25:02,804 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bd7e97e03ca6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:02,898 SpawnPoolWorker-39 DEBUG    upload finished in 1.231334s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:03,177 SpawnPoolWorker-41 DEBUG    upload finished in 1.181864s, attributes: file_id=6f0d651d1cf0
2025-05-26 19:25:03,178 SpawnPoolWorker-41 DEBUG    upload finished in 1.18328s, attributes: file_id=6f0d651d1cf0
upload:  42%|████▏     | 442/1056 [01:18<01:39,  6.15it/s]2025-05-26 19:25:03,183 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ad7e0152a61c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:03,238 SpawnPoolWorker-37 DEBUG    upload finished in 1.133248s, attributes: file_id=bdf7be16cb79
2025-05-26 19:25:03,238 SpawnPoolWorker-37 DEBUG    upload finished in 1.133704s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:03,460 SpawnPoolWorker-36 DEBUG    upload finished in 1.283554s, attributes: file_id=be2e96cf060b
2025-05-26 19:25:03,461 SpawnPoolWorker-36 DEBUG    upload finished in 1.284093s, attributes: file_id=be2e96cf060b
upload:  42%|████▏     | 444/1056 [01:18<01:35,  6.40it/s]2025-05-26 19:25:03,464 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/30a6a0bb97b1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:03,597 SpawnPoolWorker-38 DEBUG    upload finished in 1.284569s, attributes: file_id=bbafe4f4ae02
2025-05-26 19:25:03,597 SpawnPoolWorker-38 DEBUG    upload finished in 1.285115s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:03,677 SpawnPoolWorker-40 DEBUG    upload finished in 1.302508s, attributes: file_id=25aa9170c232
2025-05-26 19:25:03,678 SpawnPoolWorker-40 DEBUG    upload finished in 1.303131s, attributes: file_id=25aa9170c232
2025-05-26 19:25:03,679 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1954ce55ee1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:04,272 SpawnPoolWorker-39 DEBUG    upload finished in 1.372241s, attributes: file_id=b6accb386d56
2025-05-26 19:25:04,272 SpawnPoolWorker-39 DEBUG    upload finished in 1.372837s, attributes: file_id=b6accb386d56
upload:  42%|████▏     | 448/1056 [01:19<01:51,  5.45it/s]2025-05-26 19:25:04,275 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/678b09a5c55b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:04,324 SpawnPoolWorker-34 DEBUG    upload finished in 1.521581s, attributes: file_id=bd7e97e03ca6
2025-05-26 19:25:04,324 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:04,558 SpawnPoolWorker-41 DEBUG    upload finished in 1.376691s, attributes: file_id=ad7e0152a61c
2025-05-26 19:25:04,559 SpawnPoolWorker-41 DEBUG    upload finished in 1.377411s, attributes: file_id=ad7e0152a61c
upload:  43%|████▎     | 450/1056 [01:20<01:44,  5.82it/s]2025-05-26 19:25:04,561 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fa9d92225197.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:04,575 SpawnPoolWorker-37 DEBUG    upload finished in 1.335108s, attributes: file_id=819a4e0cf13e
2025-05-26 19:25:04,575 SpawnPoolWorker-37 DEBUG    upload finished in 1.335725s, attributes: file_id=819a4e0cf13e
2025-05-26 19:25:04,576 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:04,832 SpawnPoolWorker-36 DEBUG    upload finished in 1.369718s, attributes: file_id=30a6a0bb97b1
2025-05-26 19:25:04,832 SpawnPoolWorker-36 DEBUG    upload finished in 1.37028s, attributes: file_id=30a6a0bb97b1
upload:  43%|████▎     | 452/1056 [01:20<01:37,  6.19it/s]2025-05-26 19:25:04,834 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/50958add8224.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:04,978 SpawnPoolWorker-35 DEBUG    upload finished in 1.344412s, attributes: file_id=1e84f9591f2f
2025-05-26 19:25:04,979 SpawnPoolWorker-35 DEBUG    upload finished in 1.345026s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:05,088 SpawnPoolWorker-38 DEBUG    upload finished in 1.488514s, attributes: file_id=76a3f97529df
2025-05-26 19:25:05,088 SpawnPoolWorker-38 DEBUG    upload finished in 1.489024s, attributes: file_id=76a3f97529df
upload:  43%|████▎     | 454/1056 [01:20<01:29,  6.71it/s]2025-05-26 19:25:05,089 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6613713a74fb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:05,223 SpawnPoolWorker-40 DEBUG    upload finished in 1.544466s, attributes: file_id=c1954ce55ee1
2025-05-26 19:25:05,224 SpawnPoolWorker-40 DEBUG    upload finished in 1.545067s, attributes: file_id=c1954ce55ee1
upload:  43%|████▎     | 455/1056 [01:20<01:27,  6.84it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:05,688 SpawnPoolWorker-34 DEBUG    upload finished in 1.3617s, attributes: file_id=c167e36596f6
2025-05-26 19:25:05,688 SpawnPoolWorker-34 DEBUG    upload finished in 1.3623s, attributes: file_id=c167e36596f6
upload:  43%|████▎     | 456/1056 [01:21<02:13,  4.50it/s]2025-05-26 19:25:05,690 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d27bb6f11092.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:05,740 SpawnPoolWorker-39 DEBUG    upload finished in 1.466601s, attributes: file_id=678b09a5c55b
2025-05-26 19:25:05,740 SpawnPoo

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:06,011 SpawnPoolWorker-37 DEBUG    upload finished in 1.435067s, attributes: file_id=0e65fd8286be
2025-05-26 19:25:06,011 SpawnPoolWorker-37 DEBUG    upload finished in 1.435501s, attributes: file_id=0e65fd8286be
upload:  43%|████▎     | 458/1056 [01:21<01:58,  5.06it/s]2025-05-26 19:25:06,013 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/169d489eb12f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:06,098 SpawnPoolWorker-41 DEBUG    upload finished in 1.5377s, attributes: file_id=fa9d92225197
2025-05-26 19:25:06,098 SpawnPoolWorker-41 DEBUG    upload finished in 1.538137s, attributes: file_id=fa9d92225197
2025-05-26 19:25:06,1

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:06,355 SpawnPoolWorker-35 DEBUG    upload finished in 1.374873s, attributes: file_id=f97476feafbb
2025-05-26 19:25:06,356 SpawnPoolWorker-35 DEBUG    upload finished in 1.376075s, attributes: file_id=f97476feafbb
upload:  44%|████▎     | 460/1056 [01:21<01:52,  5.31it/s]2025-05-26 19:25:06,360 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53b37310692d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:06,397 SpawnPoolWorker-38 DEBUG    upload finished in 1.307738s, attributes: file_id=6613713a74fb
2025-05-26 19:25:06,397 SpawnPoolWorker-38 DEBUG    upload finished in 1.308316s, attributes: file_id=6613713a74fb
2025-05-26 19:25:06

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:07,024 SpawnPoolWorker-39 DEBUG    upload finished in 1.282489s, attributes: file_id=d192b57dc17f
2025-05-26 19:25:07,025 SpawnPoolWorker-39 DEBUG    upload finished in 1.283205s, attributes: file_id=d192b57dc17f
2025-05-26 19:25:07,027 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/435c26f607ec.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:07,044 SpawnPoolWorker-34 DEBUG    upload finished in 1.354158s, attributes: file_id=d27bb6f11092
2025-05-26 19:25:07,044 SpawnPool

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:07,234 SpawnPoolWorker-37 DEBUG    upload finished in 1.220829s, attributes: file_id=169d489eb12f
2025-05-26 19:25:07,234 SpawnPoolWorker-37 DEBUG    upload finished in 1.22173s, attributes: file_id=169d489eb12f
upload:  44%|████▍     | 466/1056 [01:22<01:42,  5.76it/s]2025-05-26 19:25:07,237 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7bf67b71c872.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:07,416 SpawnPoolWorker-41 DEBUG    upload finished in 1.316691s, attributes: file_id=04091b97a6bd
2025-05-26 19:25:07,417 SpawnPoolWorker-41 DEBUG    upload finished in 1.317751s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:07,615 SpawnPoolWorker-38 DEBUG    upload finished in 1.215959s, attributes: file_id=f50cd1806d9d
2025-05-26 19:25:07,615 SpawnPoolWorker-38 DEBUG    upload finished in 1.217031s, attributes: file_id=f50cd1806d9d
upload:  44%|████▍     | 468/1056 [01:23<01:46,  5.54it/s]2025-05-26 19:25:07,620 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fdd0c10e251d.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:07,770 SpawnPoolWorker-40 DEBUG    upload finished in 1.279638s, attributes: file_id=02b3cd1e6c45
2025-05-26 19:25:07,771 SpawnPoolWorker-40 DEBUG    upload finished in 1.280377s, attributes: file_id=02b3cd1e6c45
upload:  44%|████▍ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:07,840 SpawnPoolWorker-36 DEBUG    upload finished in 1.43993s, attributes: file_id=802f4c9b2c89
2025-05-26 19:25:07,840 SpawnPoolWorker-36 DEBUG    upload finished in 1.440539s, attributes: file_id=802f4c9b2c89
2025-05-26 19:25:07,843 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/06b34690a708.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:08,186 SpawnPoolWorker-39 DEBUG    upload finished in 1.16s, attributes: file_id=435c26f607ec
2025-05-26 19:25:08,186 SpawnPoolWorker-39 DEBUG    upload finished in 1.160445s, attributes: file_id=435c26f607ec
upload:  45%|████▍     | 472/1056 [01:23<01:31,  6.40it/s]2025-05-26 19:25:08,188 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/28176407b9a5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:08,205 SpawnPoolWorker-34 DEBUG    upload finished in 1.160175s, attributes: file_id=247cbd93b06b
2025-05-26 19:25:08,206 SpawnPoo

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:08,566 SpawnPoolWorker-37 DEBUG    upload finished in 1.329513s, attributes: file_id=7bf67b71c872
2025-05-26 19:25:08,566 SpawnPoolWorker-37 DEBUG    upload finished in 1.330113s, attributes: file_id=7bf67b71c872
upload:  45%|████▍     | 474/1056 [01:24<01:37,  5.98it/s]2025-05-26 19:25:08,567 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a2f455c9b97c.json not detected as batch file data
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:08,889 SpawnPoolWorker-38 DEBUG    upload finished in 1.271387s, attributes: file_id=fdd0c10e251d
2025-05-26 19:25:08,890 SpawnPoolWorker-38 DEBUG    upload finished in 1.272193s, attributes: file_id=fdd0c10e251d
upload:  45%|████▍     | 475/1056 [01:24<01:54,  5.08it/s]2025-05-26 19:25:08,892 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82e45cba2052.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:08,924 SpawnPoolWorker-36 DEBUG    upload finished in 1.083101s, attributes: file_id=06b34690a708
2025-05-26 19:25:08,925 SpawnPoolWorker-36 DEBUG    upload finished in 1.083649s, attributes: file_id=06b34690a708
2025-05-26 19:25:08

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:25:09,101 SpawnPoolWorker-35 DEBUG    upload finished in 1.313801s, attributes: file_id=89878c05287a
2025-05-26 19:25:09,101 SpawnPoolWorker-35 DEBUG    upload finished in 1.314459s, attributes: file_id=89878c05287a
2025-05-26 19:25:09,104 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7efb9284a55f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:09,430 SpawnPoolWorker-34 DEBUG    upload finished in 1.22265s, attributes: file_id=88b4ab4b6781
2025-05-26 19:25:09,430 SpawnPoolWorker-34 DEBUG    upload finished in 1.223263s, attributes: file_id=88b4ab4b6781
upload:  45%|████▌     | 480/1056 [01:24<01:26,  6.66it/s]2025-05-26 19:25:09,433 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16b4d9a43f2d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:09,662 SpawnPoolWorker-39 DEBUG    upload finished in 1.474824s, attributes: file_id=28176407b9a5
2025-05-26 19:25:09,663 SpawnPoolWorker-39 DEBUG    upload finished in 1.475654s, attributes: file_id=28176407b9a5
upload:  46%|████▌     | 481/1056 [01:25<01:34,  6.07it/s]2025-05-26 19:25:09,667 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/68a2183fdca2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:09,921 SpawnPoolWorker-37 DEBUG    upload finished in 1.353523s, attributes: file_id=a2f455c9b97c
2025-05-26 19:25:09,921 SpawnPoolWorker-37 DEBUG    upload finished in 1.354039s, attributes: file_id=a2f455c9b97c
upload:  46%|████▌     | 482/1056 [01:25<01:45,  5.45it/s]2025-05-26 19:25:09,924 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8e1aa07cc5ea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:10,246 SpawnPoolWorker-36 DEBUG    upload finished in 1.320061s, attributes: file_id=07c09ce2f45e
2025-05-26 19:25:10,246 SpawnPoolWorker-36 DEBUG    upload finished in 1.320631s, attributes: file_id=07c09ce2f45e
upload:  46%|████▌     | 483/1056 [01:25<02:03,  4.65it/s]2025-05-26 19:25:10,248 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/966e8054c9f8.json not detected as batch file data
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:10,340 SpawnPoolWorker-38 DEBUG    upload finished in 1.448342s, attributes: file_id=82e45cba2052
2025-05-26 19:25:10,340 SpawnPoolWorker-38 DEBUG    upload finished in 1.449021s, attributes: file_id=82e45cba2052
2025-05-26 19:25:10,343 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d6ced8e00da7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:10,466 SpawnPoolWorker-40 DEBUG    upload finished in 1.42397s, attributes: file_id=45169ae7c35c
2025-05-26 19:25:10,467 SpawnPoolWorker-40 DEBUG    upload finished in 1.424804s, attributes: file_id=45169ae7c35c
upload:  46%|████▌     | 485/1056 [01:25<01:39,  5.74it/s]2025-05-26 19:25:10,470 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c6a716652907.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:10,669 SpawnPoolWorker-41 DEBUG    upload finished in 1.679818s, attributes: file_id=d80bc2151d77
2025-05-26 19:25:10,670 SpawnPoolWorker-41 DEBUG    upload finished in 1.68079s, attributes: file_id=d80bc2151d77
upload:  46%|████▌     | 486/1056 [01:26<01:42,  5.54it/s]

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:10,675 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4e4cd4944d7a.json not detected as batch file data
2025-05-26 19:25:10,703 SpawnPoolWorker-35 DEBUG    upload finished in 1.599614s, attributes: file_id=7efb9284a55f
2025-05-26 19:25:10,703 SpawnPoolWorker-35 DEBUG    upload finished in 1.600128s, attributes: file_id=7efb9284a55f
2025-05-26 19:25:10,705 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7bc78e9b2de8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:10,916 SpawnPoolWorker-34 DEBUG    upload finished in 1.484389s, attributes: file_id=16b4d9a43f2d
2025-05-26 19:25:10,916 SpawnPoolWorker-34 DEBUG    upload finished in 1.484911s, attributes: file_id=16b4d9a43f2d
upload:  46%|████▌     | 488/1056 [01:26<01:30,  6.31it/s]2025-05-26 19:25:10,918 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d180aeb6f489.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:11,101 SpawnPoolWorker-39 DEBUG    upload finished in 1.436077s, attributes: file_id=68a2183fdca2
2025-05-26 19:25:11,102 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:11,214 SpawnPoolWorker-37 DEBUG    upload finished in 1.291687s, attributes: file_id=8e1aa07cc5ea
2025-05-26 19:25:11,214 SpawnPoolWorker-37 DEBUG    upload finished in 1.292249s, attributes: file_id=8e1aa07cc5ea
upload:  46%|████▋     | 490/1056 [01:26<01:26,  6.57it/s]2025-05-26 19:25:11,217 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ebea2885521a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:11,634 SpawnPoolWorker-36 DEBUG    upload finished in 1.386202s, attributes: file_id=966e8054c9f8
2025-05-26 19:25:11,634 SpawnPoolWorker-36 DEBUG    upload finished in 1.38664s, attributes: file_id=966e8054c9f8
upload:  46%|████▋     | 491/1056 [01:27<02:03,  4.56it/s]2025-05-26 19:25:11,636 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f78bdb9e0a94.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:11,740 SpawnPoolWorker-40 DEBUG    upload finished in 1.270841s, attributes: file_id=c6a716652907
2025-05-26 19:25:11,740 SpawnPoolWorker-40 DEBUG    upload finished in 1.271523s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:25:11,896 SpawnPoolWorker-35 DEBUG    upload finished in 1.192255s, attributes: file_id=7bc78e9b2de8
2025-05-26 19:25:11,897 SpawnPoolWorker-35 DEBUG    upload finished in 1.19337s, attributes: file_id=7bc78e9b2de8
upload:  47%|████▋     | 495/1056 [01:27<01:05,  8.56it/s]2025-05-26 19:25:11,901 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/64f704dbf808.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:12,107 SpawnPoolWorker-34 DEBUG    upload finished in 1.189372s, attributes: file_id=d180aeb6f489
2025-05-26 19:25:12,107 SpawnPoolWorker-34 DEBUG    upload finished in 1.189873s, attributes: file_id=d180aeb6f489
2025-05-26 19:25:12,109 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/42e003079f80.json not detected as batch file data
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:12,441 SpawnPoolWorker-39 DEBUG    upload finished in 1.337713s, attributes: file_id=712803ca2d70
2025-05-26 19:25:12,441 SpawnPoolWorker-39 DEBUG    upload finished in 1.338124s, attributes: file_id=712803ca2d70
upload:  47%|████▋     | 497/1056 [01:27<01:34,  5.93it/s]2025-05-26 19:25:12,443 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/15ec535ced3b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:12,512 SpawnPoolWorker-37 DEBUG    upload finished in 1.296651s, attributes: file_id=ebea2885521a
2025-05-26 19:25:12,512 SpawnPoolWorker-37 DEBUG    upload finished in 1.29713s, attributes: file_id=ebea2885521a
2025-05-26 19:25:12,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:12,904 SpawnPoolWorker-36 DEBUG    upload finished in 1.268712s, attributes: file_id=f78bdb9e0a94
2025-05-26 19:25:12,904 SpawnPoolWorker-36 DEBUG    upload finished in 1.269312s, attributes: file_id=f78bdb9e0a94
upload:  47%|████▋     | 499/1056 [01:28<01:45,  5.29it/s]2025-05-26 19:25:12,906 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/903ae06fdb23.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:13,014 SpawnPoolWorker-40 DEBUG    upload finished in 1.271743s, attributes: file_id=a6b6253aa91b
2025-05-26 19:25:13,014 SpawnPoolWorker-40 DEBUG    upload finished in 1.272448s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:13,136 SpawnPoolWorker-38 DEBUG    upload finished in 1.365205s, attributes: file_id=993bf7caaae2
2025-05-26 19:25:13,136 SpawnPoolWorker-38 DEBUG    upload finished in 1.365766s, attributes: file_id=993bf7caaae2
upload:  48%|████▊     | 502/1056 [01:28<01:14,  7.42it/s]2025-05-26 19:25:13,138 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3a5b71c4e31a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:13,175 SpawnPoolWorker-41 DEBUG    upload finished in 1.389925s, attributes: file_id=561f1fe1b3b2
2025-05-26 19:25:13,175 SpawnPoolWorker-41 DEBUG    upload finished in 1.390758s, attributes: file_id=561f1fe1b3b2
2025-05-26 19:25:13,177 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:13,605 SpawnPoolWorker-39 DEBUG    upload finished in 1.162601s, attributes: file_id=15ec535ced3b
2025-05-26 19:25:13,605 SpawnPoolWorker-39 DEBUG    upload finished in 1.163179s, attributes: file_id=15ec535ced3b
upload:  48%|████▊     | 505/1056 [01:29<01:19,  6.95it/s]2025-05-26 19:25:13,607 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/614717f4fdb1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:13,707 SpawnPoolWorker-37 DEBUG    upload finished in 1.193565s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:14,069 SpawnPoolWorker-36 DEBUG    upload finished in 1.163534s, attributes: file_id=903ae06fdb23
2025-05-26 19:25:14,070 SpawnPoolWorker-36 DEBUG    upload finished in 1.16407s, attributes: file_id=903ae06fdb23
upload:  48%|████▊     | 507/1056 [01:29<01:39,  5.53it/s]2025-05-26 19:25:14,072 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b78d23ff9a15.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:14,265 SpawnPoolWorker-40 DEBUG    upload finished in 1.24898s, attributes: file_id=bdf1dc1e7fa7
2025-05-26 19:25:14,267 SpawnPoolWorker-40 DEBUG    upload finished in 1.251626s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:14,304 SpawnPoolWorker-38 DEBUG    upload finished in 1.166118s, attributes: file_id=3a5b71c4e31a
2025-05-26 19:25:14,304 SpawnPoolWorker-38 DEBUG    upload finished in 1.166747s, attributes: file_id=3a5b71c4e31a
2025-05-26 19:25:14,306 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bda38eaa1fac.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:14,377 SpawnPoolWorker-34 DEBUG    upload finished in 1.182642s, attributes: file_id=3e45508b3

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:14,873 SpawnPoolWorker-37 DEBUG    upload finished in 1.164684s, attributes: file_id=c05f6e12264c
2025-05-26 19:25:14,874 SpawnPoolWorker-37 DEBUG    upload finished in 1.165215s, attributes: file_id=c05f6e12264c
upload:  49%|████▊     | 513/1056 [01:30<01:20,  6.73it/s]2025-05-26 19:25:14,876 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e879eb958ba7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:14,915 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:15,423 SpawnPoolWorker-36 DEBUG    upload finished in 1.351933s, attributes: file_id=b78d23ff9a15
2025-05-26 19:25:15,424 SpawnPoolWorker-36 DEBUG    upload finished in 1.352522s, attributes: file_id=b78d23ff9a15
upload:  49%|████▉     | 515/1056 [01:30<01:40,  5.36it/s]2025-05-26 19:25:15,425 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/546f6d626bdd.json not detected as batch file data
2025-05-26 19:25:15,545 SpawnPoolWorker-38 DEBUG    upload finished in 1.238864s, attributes: file_id=bda38eaa1fac
2025-05-26 19:25:15,545 SpawnPoolWorker-38 DEBUG    upload finished in 1.239441s, attributes: file_id=bda38eaa1fac
upload:  49%|████▉     | 516/1056 [01:31<01:34,  5.71it/s]2025-05-26 19:25:15,547 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bdb714053945.json not detected as batch file d

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:15,626 SpawnPoolWorker-34 DEBUG    upload finished in 1.246924s, attributes: file_id=bde8c7c94187
2025-05-26 19:25:15,626 SpawnPoolWorker-34 DEBUG    upload finished in 1.247504s, attributes: file_id=bde8c7c94187
2025-05-26 19:25:15,628 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3d5399568b61.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:15,814 SpawnPoolWorker-41 DEBUG    upload finished in 1.389915s, attributes: file_id=1541bfdcfc0b
2025-05-26 19:25:15,814 SpawnPoolWorker-41 DEBUG    upload finished in 1.390744s, attributes: file_id=1541bfdcfc0b
upload:  49%|████▉     | 519/1056 [01:31<01:14,  7.23it/s]2025-05-26 19:25:15

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:15,882 SpawnPoolWorker-35 DEBUG    upload finished in 1.409636s, attributes: file_id=99911038e538
2025-05-26 19:25:15,882 SpawnPoolWorker-35 DEBUG    upload finished in 1.410133s, attributes: file_id=99911038e538
2025-05-26 19:25:15,884 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f3b582d3590.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:16,243 SpawnPoolWorker-37 DEBUG    upload finished in 1.367997s, attributes: file_id=e879eb958ba7
2025-05-26 19:25:16,243 SpawnPoolWorker-37 DEBUG    upload finished in 1.368456s, attributes: file_id=e879eb958ba7
upload:  49%|████▉     | 521/1056 [01:31<01:25,  6.23it/s]2025-05-26 19:25:16,245 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c32e22df4772.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:16,385 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:16,657 SpawnPoolWorker-36 DEBUG    upload finished in 1.232336s, attributes: file_id=546f6d626bdd
2025-05-26 19:25:16,658 SpawnPoolWorker-36 DEBUG    upload finished in 1.232868s, attributes: file_id=546f6d626bdd
upload:  50%|████▉     | 523/1056 [01:32<01:35,  5.56it/s]2025-05-26 19:25:16,660 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/19baf1fdefd7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:17,096 SpawnPoolWorker-41 DEBUG    upload finished in 1.280091s, attributes: file_id=de2f63c19dcc
2025-05-26 19:25:17,098 SpawnPoolWorker-41 DEBUG    upload finished in 1.28287s, attributes: file_id=de2f63c19dcc
upload:  50%|████▉     | 524/1056 [01:32<02:06,  4.21it/s]2025-05-26 19:25:17,102 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/035bb48ab31d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:17,121 SpawnPoolWorker-34 DEBUG    upload finished in 1.493482s, attributes: file_id=3d5399568b61
2025-05-26 19:25:17,121 SpawnPoolWorker-34 DEBUG    upload finished in 1.49413s, attributes: file_id=3d5399568b61
2025-05-26 19:25:17,1

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:25:17,436 SpawnPoolWorker-37 DEBUG    upload finished in 1.191271s, attributes: file_id=c32e22df4772
2025-05-26 19:25:17,436 SpawnPoolWorker-37 DEBUG    upload finished in 1.191886s, attributes: file_id=c32e22df4772
upload:  50%|█████     | 529/1056 [01:32<01:09,  7.57it/s]2025-05-26 19:25:17,439 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/aa93f01c7a7c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:17,610 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:17,860 SpawnPoolWorker-36 DEBUG    upload finished in 1.200665s, attributes: file_id=19baf1fdefd7
2025-05-26 19:25:17,860 SpawnPoolWorker-36 DEBUG    upload finished in 1.201254s, attributes: file_id=19baf1fdefd7
upload:  50%|█████     | 531/1056 [01:33<01:22,  6.37it/s]2025-05-26 19:25:17,862 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0cbcd5252acc.json not detected as batch file data
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:18,356 SpawnPoolWorker-41 DEBUG    upload finished in 1.255487s, attributes: file_id=035bb48ab31d
2025-05-26 19:25:18,357 SpawnPoolWorker-41 DEBUG    upload finished in 1.256543s, attributes: file_id=035bb48ab31d
upload:  50%|█████     | 532/1056 [01:33<01:54,  4.57it/s]2025-05-26 19:25:18,361 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/43bbc7c2574f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:18,415 SpawnPoolWorker-35 DEBUG    upload finished in 1.242197s, attributes: file_id=030ef20a28e1
2025-05-26 19:25:18,415 SpawnPoolWorker-35 DEBUG    upload finished in 1.242697s, attributes: file_id=030ef20a28e1
2025-05-26 19:25:18

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:18,590 SpawnPoolWorker-37 DEBUG    upload finished in 1.152114s, attributes: file_id=aa93f01c7a7c
2025-05-26 19:25:18,590 SpawnPoolWorker-37 DEBUG    upload finished in 1.1526s, attributes: file_id=aa93f01c7a7c
upload:  51%|█████     | 537/1056 [01:34<00:57,  9.00it/s]2025-05-26 19:25:18,592 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7604213daa85.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Re

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:18,946 SpawnPoolWorker-36 DEBUG    upload finished in 1.084584s, attributes: file_id=0cbcd5252acc
2025-05-26 19:25:18,946 SpawnPoolWorker-36 DEBUG    upload finished in 1.085158s, attributes: file_id=0cbcd5252acc
upload:  51%|█████     | 539/1056 [01:34<01:07,  7.68it/s]2025-05-26 19:25:18,948 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4ae1530b6b3e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:19,726 SpawnPoolWorker-38 DEBUG    upload finished in 1.20562s, attributes: file_id=89efc08f6543
2025-05-26 19:25:19,727 SpawnPoolWorker-38 DEBUG    upload finished in 1.207765s, attributes: file_id=89efc08f6543
2025-05-26 19:25:19,731 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e90dc8a5b8c5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:19,752 SpawnPoolWorker-41 DEBUG    upload finished in 1.391642s, attributes: file_id=43bbc7c2574f
2025-05-26 19:25:19,752 SpawnPoolWorker-41 DEBUG    upload finished in 1.392731s, attributes: file_id=43bbc7c2574f
upload:  51%|█████     | 541/1056 [01:35<01:47,  4.77it/s]2025-05-26 19:25:19,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:25:20,028 SpawnPoolWorker-40 DEBUG    upload finished in 1.466693s, attributes: file_id=37272a0b2b61
2025-05-26 19:25:20,028 SpawnPoolWorker-40 DEBUG    upload finished in 1.467313s, attributes: file_id=37272a0b2b61
upload:  52%|█████▏    | 544/1056 [01:35<01:24,  6.08it/s]2025-05-26 19:25:20,032 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/730956c16e1c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:20,043 SpawnPoolWorker-37 DEBUG    upload finished in 1.45222s, attributes: file_id=7604213daa85
2025-05-26 19:25:20,044 SpawnPoolWorker-37 DEBUG    upload finished in 1.453107s, attributes: file_id=7604213daa85
2025-05-26 19:25:20,050 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:21,223 SpawnPoolWorker-35 DEBUG    upload finished in 1.380298s, attributes: file_id=ded8358d281b
2025-05-26 19:25:21,224 SpawnPoolWorker-35 DEBUG    upload finished in 1.381901s, attributes: file_id=ded8358d281b
upload:  52%|█████▏    | 548/1056 [01:36<02:06,  4.03it/s]2025-05-26 19:25:21,229 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f23114ccf808.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:21,275 SpawnPoolWorker-41 DEBUG    upload finished in 1.519978s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:25:21,574 SpawnPoolWorker-37 DEBUG    upload finished in 1.525085s, attributes: file_id=8aed506e5f1a
2025-05-26 19:25:21,575 SpawnPoolWorker-37 DEBUG    upload finished in 1.526732s, attributes: file_id=8aed506e5f1a
2025-05-26 19:25:21,579 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2fec7ed79594.json not detected as batch file data
2025-05-26 19:25:21,615 SpawnPoolWorker-39 DEBUG    upload finished in 1.466312s, attributes: file_id=30888762b692
2025-05-26 19:25:21,615 SpawnPoolWorker-39 DEBUG    upload finished in 1.466796s, attributes: file_id=30888762b692
upload:  52%|█████▏    | 553/1056 [01:37<01:20,  6.24it/s]2025-05-26 19:25:21,617 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8e70afdbe32.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:21,868 SpawnPoolWorker-36 DEBUG    upload finished in 1.731865s, attributes: file_id=81039c414428
2025-05-26 19:25:21,868 SpawnPoolWorker-36 DEBUG    upload finished in 1.732354s, attributes: file_id=81039c414428
upload:  53%|█████▎    | 555/1056 [01:37<01:15,  6.62it/s]2025-05-26 19:25:21,870 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2ac8f99ee545.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:22,763 SpawnPoolWorker-38 DEBUG    upload finished in 1.447475s, attributes: file_id=c641300cfa2d
2025-05-26 19:25:22,764 SpawnPoolWorker-38 DEBUG    upload finished in 1.448006s, attributes: file_id=c641300cfa2d
2025-05-26 19:25:22,766 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/517a099f57f0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:22,840 SpawnPoolWorker-35 DEBUG    upload finished in 1.612869s, attributes: file_id=f23114ccf808
2025-05-26 19:25:22,841 SpawnPoolWorker-35 DEBUG    upload finished in 1.613738s, attributes: file_id=f23114ccf808
upload:  53%|█████▎    |

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:23,064 SpawnPoolWorker-37 DEBUG    upload finished in 1.486571s, attributes: file_id=2fec7ed79594
2025-05-26 19:25:23,064 SpawnPoolWorker-37 DEBUG    upload finished in 1.487275s, attributes: file_id=2fec7ed79594
upload:  53%|█████▎    | 559/1056 [01:38<01:42,  4.83it/s]2025-05-26 19:25:23,066 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7b827ad753bc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:23,121 SpawnPoolWorker-34 DEBUG    upload finished in 1.745977s, attributes: file_id=5ebbcc2d2e7b
2025-05-26 19:25:23,121 SpawnPoolWorker-34 DEBUG    upload finished in 1.746507s, attributes: file_id=5ebbcc2d2e7b
2025-05-26 19:25:23

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:23,347 SpawnPoolWorker-36 DEBUG    upload finished in 1.478062s, attributes: file_id=2ac8f99ee545
2025-05-26 19:25:23,348 SpawnPoolWorker-36 DEBUG    upload finished in 1.478505s, attributes: file_id=2ac8f99ee545
2025-05-26 19:25:23,349 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0be7ef37a526.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:23,439 SpawnPoolWorker-39 DEBUG    upload finished in 1.823311s, attributes: file_id=f8e70afdbe32
2025-05-26 19:25:23,440 SpawnPoolWorker-39 DEBUG    upload finished in 1.823743s, attributes: file_id=f8e70afdbe32
upload:  53%|█████▎    | 563/1056 [01:38<01:14,  6.64it/s]2025-05-26 19:25:23

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:24,133 SpawnPoolWorker-38 DEBUG    upload finished in 1.368357s, attributes: file_id=517a099f57f0
2025-05-26 19:25:24,135 SpawnPoolWorker-38 DEBUG    upload finished in 1.370377s, attributes: file_id=517a099f57f0
upload:  53%|█████▎    | 564/1056 [01:39<02:00,  4.08it/s]2025-05-26 19:25:24,138 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1c9488cdf35d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:24,266 SpawnPoolWorker-41 DEBUG    upload finished in 1.349671s, attributes: file_id=097a8ccc619f
2025-05-26 19:25:24,267 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:24,386 SpawnPoolWorker-35 DEBUG    upload finished in 1.543887s, attributes: file_id=a3df2537c6e4
2025-05-26 19:25:24,386 SpawnPoolWorker-35 DEBUG    upload finished in 1.544489s, attributes: file_id=a3df2537c6e4
upload:  54%|█████▎    | 566/1056 [01:39<01:37,  5.01it/s]2025-05-26 19:25:24,388 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d17da98700a5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:24,473 SpawnPoolWorker-37 DEBUG    upload finished in 1.407605s, attributes: file_id=7b827ad753bc
2025-05-26 19:25:24,474 SpawnPoolWorker-37 DEBUG    upload finished in 1.408393s, attributes: file_id=7b827ad753bc
2025-05-26 19:25:24

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:24,740 SpawnPoolWorker-40 DEBUG    upload finished in 1.485305s, attributes: file_id=dce3452728c5
2025-05-26 19:25:24,740 SpawnPoolWorker-40 DEBUG    upload finished in 1.485787s, attributes: file_id=dce3452728c5
2025-05-26 19:25:24,741 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ab51e0dce31d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:24,789 SpawnPoolWorker-36 DEBUG    upload finished in 1.439693s, attributes: file_id=0be7ef37a526
2025-05-26 19:25:24,789 SpawnPoolWorker-36 DEBUG    upload finished in 1.44024s, attri

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:25,439 SpawnPoolWorker-38 DEBUG    upload finished in 1.301142s, attributes: file_id=1c9488cdf35d
2025-05-26 19:25:25,440 SpawnPoolWorker-38 DEBUG    upload finished in 1.30273s, attributes: file_id=1c9488cdf35d
upload:  54%|█████▍    | 572/1056 [01:40<01:39,  4.87it/s]2025-05-26 19:25:25,445 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5c7fc356f859.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:25,630 SpawnPoolWorker-35 DEBUG    upload finished in 1.242195s, attributes: file_id=d17da98700a5
2025-05-26 19:25:25,631 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:25,714 SpawnPoolWorker-41 DEBUG    upload finished in 1.444009s, attributes: file_id=3bb5ab93ad8f
2025-05-26 19:25:25,714 SpawnPoolWorker-41 DEBUG    upload finished in 1.444426s, attributes: file_id=3bb5ab93ad8f
2025-05-26 19:25:25,716 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bc436f5f8c2d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:25,768 SpawnPoolWorker-34 DEBUG    upload finished in 1.253553s, attributes: file_id=5cb75b78a9f6
2025-05-26 19:25:25,768 SpawnPoolWorker-34 DEBUG    upload finished in 1.254159s, attributes: file_id=5cb75b78a9f6
upload:  54%|█████▍    | 575/1056 [01:41<01:14,  6.47it/s]2025-05-26 19:25:25,771 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:25,981 SpawnPoolWorker-36 DEBUG    upload finished in 1.190801s, attributes: file_id=e76cc018e624
upload:  55%|█████▍    | 577/1056 [01:41<01:06,  7.22it/s]2025-05-26 19:25:25,981 SpawnPoolWorker-36 DEBUG    upload finished in 1.191524s, attributes: file_id=e76cc018e624
2025-05-26 19:25:25,985 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dc99f261ab30.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:26,076 SpawnPoolWorker-40 DEBUG    upload finished in 1.335459s, attributes: file_id=ab51e0dce31d
2025-05-26 19:25:26,077 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:26,238 SpawnPoolWorker-39 DEBUG    upload finished in 1.413397s, attributes: file_id=29e510ff7baa
2025-05-26 19:25:26,239 SpawnPoolWorker-39 DEBUG    upload finished in 1.41386s, attributes: file_id=29e510ff7baa
upload:  55%|█████▍    | 579/1056 [01:41<01:04,  7.40it/s]2025-05-26 19:25:26,240 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2471f4ef10cf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:26,747 SpawnPoolWorker-38 DEBUG    upload finished in 1.303709s, attributes: file_id=5c7fc356f859
2025-05-26 19:25:26,747 SpawnPoolWorker-38 DEBUG    upload finished in 1.304537s, attributes: file_id=5c7fc356f859
upload:  55%|█████▍    | 580/1056 [01:42<01:37,  4.87it/s]2025-05-26 19:25:26,749 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2ad8f228b711.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:27,003 SpawnPoolWorker-34 DEBUG    upload finished in 1.23388s, attributes: file_id=ad8d835da2eb
2025-05-26 19:25:27,004 SpawnPoolWorker-34 DEBUG    upload finished in 1.234367s, attributes: file_id=ad8d835da2eb
upload:  55%|█████▌    | 581/1056 [01:42<01:42,  4.62it/s]2025-05-26 19:25:27,005 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec415775b8bf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:27,143 SpawnPoolWorker-35 DEBUG    upload finished in 1.506868s, attributes: file_id=26c83189fdda
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:27,144 SpawnPoolWorker-35 DEBUG    upload finished in 1.509064s, attributes: file_id=26c83189fdda
upload:  55%|█████▌ 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:27,212 SpawnPoolWorker-37 DEBUG    upload finished in 1.370926s, attributes: file_id=abe9d7488ac0
2025-05-26 19:25:27,214 SpawnPoolWorker-37 DEBUG    upload finished in 1.372213s, attributes: file_id=abe9d7488ac0
2025-05-26 19:25:27,216 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b4c256ef5454.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:27,276 SpawnPoolWorker-41 DEBUG    upload finished in 1.560714s, attributes: file_id=bc436f5f8c2d
2025-05-26 19:25:27,276 SpawnPoolWorker-41 DEBUG    upload finished in 1.561302s, attributes: file_id=bc436f5f8c2d
upload:  55%|█████▌    | 584/1056 [01:42<01:08,  6.86it/s]2025-05-26 19:25:27,277 SpawnPoolWorker-36 DEBUG    upload finished in 1

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:27,601 SpawnPoolWorker-39 DEBUG    upload finished in 1.361229s, attributes: file_id=2471f4ef10cf
2025-05-26 19:25:27,601 SpawnPoolWorker-39 DEBUG    upload finished in 1.361762s, attributes: file_id=2471f4ef10cf
upload:  56%|█████▌    | 587/1056 [01:43<01:00,  7.78it/s]2025-05-26 19:25:27,603 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f1c4a3f0444f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:28,235 SpawnPoolWorker-38 DEBUG    upload finished in 1.486527s, attributes: file_id=2ad8f228b711
2025-05-26 19:25:28,235 SpawnPoolWorker-38 DEBUG    upload finished in 1.48717s, attributes: file_id=2ad8f228b711
upload:  56%|█████▌    | 588/1056 [01:43<01:42,  4.55it/s]2025-05-26 19:25:28,240 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/45f6caa72711.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:28,588 SpawnPoolWorker-34 DEBUG    upload finished in 1.583073s, attributes: file_id=ec415775b8bf
2025-05-26 19:25:28,588 SpawnPoolWorker-34 DEBUG    upload finished in 1.583508s, attributes: file_id=ec415775b8bf
upload:  56%|█████▌    | 589/1056 [01:44<01:55,  4.05it/s]2025-05-26 19:25:28,590 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c55a62087e22.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:28,665 SpawnPoolWorker-36 DEBUG    upload finished in 1.386407s, attributes: file_id=e658e9febf53
2025-05-26 19:25:28,665 SpawnPoolWorker-36 DEBUG    upload finished in 1.386857s, attributes: file_id=e658e9febf53
2025-05-26 19:25:28

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:28,825 SpawnPoolWorker-41 DEBUG    upload finished in 1.547639s, attributes: file_id=9d6ab5443f0c
2025-05-26 19:25:28,826 SpawnPoolWorker-41 DEBUG    upload finished in 1.54822s, attributes: file_id=9d6ab5443f0c
upload:  56%|█████▌    | 591/1056 [01:44<01:32,  5.01it/s]2025-05-26 19:25:28,828 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8040f3edee8a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:28,959 SpawnPoolWorker-35 DEBUG    upload finished in 1.813212s, attributes: file_id=ef057944d665
2025-05-26 19:25:28,959 SpawnPoolWorker-35 DEBUG    upload finished in 1.813746s, attributes: file_id=ef057944d665
upload:  56%|█████▌    | 592/1056 [01:44<01:26,  5.38it/s]2025-05-26 19:2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:29,062 SpawnPoolWorker-37 DEBUG    upload finished in 1.846779s, attributes: file_id=b4c256ef5454
2025-05-26 19:25:29,063 SpawnPoolWorker-37 DEBUG    upload finished in 1.847311s, attributes: file_id=b4c256ef5454
upload:  56%|█████▌    | 593/1056 [01:44<01:17,  5.99it/s]2025-05-26 19:25:29,064 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b2a268ec0d31.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:29,178 SpawnPoolWorker-40 DEBUG    upload finished in 1.823814s, attributes: file_id=1603480cd4d0
2025-05-26 19:25:29,178 SpawnPoolWorker-40 DEBUG    upload finished in 1.824328s, attributes: file_id=1603480cd4d0
upload:  56%|█████▋

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:29,405 SpawnPoolWorker-39 DEBUG    upload finished in 1.802887s, attributes: file_id=f1c4a3f0444f
2025-05-26 19:25:29,406 SpawnPoolWorker-39 DEBUG    upload finished in 1.803364s, attributes: file_id=f1c4a3f0444f
upload:  56%|█████▋    | 595/1056 [01:44<01:19,  5.77it/s]2025-05-26 19:25:29,408 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4cfddaa612de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:29,719 SpawnPoolWorker-38 DEBUG    upload finished in 1.480556s, attributes: file_id=45f6caa72711
2025-05-26 19:25:29,720 SpawnPoolWorker-38 DEBUG    upload finished in 1.481224s, attributes: file_id=45f6caa72711
upload:  56%|█████▋    | 596/1056 [01:45<01:37,  4.73it/s]2025-05-26 19:25:29,723 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1d66f84c32db.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:30,417 SpawnPoolWorker-36 DEBUG    upload finished in 1.750673s, attributes: file_id=9ef512be7678
2025-05-26 19:25:30,417 SpawnPoolWorker-36 DEBUG    upload finished in 1.751065s, attributes: file_id=9ef512be7678
upload:  57%|█████▋    | 597/1056 [01:45<02:39,  2.88it/s]2025-05-26 19:25:30,419 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ab1838ddf95f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:30,479 SpawnPoolWorker-34 DEBUG    upload finished in 1.890019s, attributes: file_id=c55a62087e22
2025-05-26 19:25:30,480 SpawnPoolWorker-34 DEBUG    upload finished in 1.890513s, attributes: file_id=c55a62087e22
2025-05-26 19:25:30

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:30,625 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce5011b9c799.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:30,650 SpawnPoolWorker-37 DEBUG    upload finished in 1.586623s, attributes: file_id=b2a268ec0d31
2025-05-26 19:25:30,651 SpawnPoolWorker-37 DEBUG    upload finished in 1.587278s, attributes: file_id=b2a268ec0d31
2025-05-26 19:25:30,653 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1c6ad10ce4d1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:30,986 SpawnPoolWorker-40 DEBUG    upload finished in 1.806355s, attributes: file_id=66b1a5f1c7f6
2025-05-26 19:25:30,986 SpawnPoolWorker-40 DEBUG    upload finished in 1.806909s, attributes: file_id=66b1a5f1c7f6
upload:  57%|█████▋    | 602/1056 [01:46<01:24,  5.39it/s]2025-05-26 19:25:30,988 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99061dc78f2e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:31,168 SpawnPoolWork

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:31,193 SpawnPoolWorker-39 DEBUG    upload finished in 1.786439s, attributes: file_id=4cfddaa612de
2025-05-26 19:25:31,194 SpawnPoolWorker-39 DEBUG    upload finished in 1.787049s, attributes: file_id=4cfddaa612de
2025-05-26 19:25:31,195 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e90958bf9294.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:32,084 SpawnPoolWorker-36 DEBUG    upload finished in 1.666139s, attributes: file_id=ab1838ddf95f
2025-05-26 19:25:32,085 SpawnPoolWorker-36 DEBUG    upload finished in 1.667017s, attributes: file_id=ab1838ddf95f
upload:  57%|█████▋    | 605/1056 [01:47<02:13,  3.39it/s]2025-05-26 19:25:32,087 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4ff2ec8d9664.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:32,106 SpawnPoolWorker-37 DEBUG    upload finished in 1.453499s, attributes: file_id=1c6ad10ce4d1
2025-05-26 19:25:32,106 SpawnPoolWorker-37 DEBUG    upload finished in 1.454s, at

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:32,370 SpawnPoolWorker-41 DEBUG    upload finished in 1.627997s, attributes: file_id=56995d3f35db
2025-05-26 19:25:32,370 SpawnPoolWorker-41 DEBUG    upload finished in 1.628507s, attributes: file_id=56995d3f35db
2025-05-26 19:25:32,373 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/84583039794c.json not detected as batch file data
2025-05-26 19:25:32,484 SpawnPoolWorker-40 DEBUG    upload finished in 1.495836s, attributes: file_id=99061dc78f2e
2025-05-26 19:25:32,484 SpawnPoolWorker-40 DEBUG    upload finished in 1.496786s, attributes: file_id=99061dc78f2e
upload:  58%|█████▊    | 610/1056 [01:47<01:12,  6.17it/s]2025-05-26 19:25:32,488 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:32,781 SpawnPoolWorker-38 DEBUG    upload finished in 1.611912s, attributes: file_id=4710f67019c7
2025-05-26 19:25:32,781 SpawnPoolWorker-38 DEBUG    upload finished in 1.612418s, attributes: file_id=4710f67019c7
upload:  58%|█████▊    | 611/1056 [01:48<01:22,  5.37it/s]2025-05-26 19:25:32,783 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/51d5a1505a5a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:32,869 SpawnPoolWorker-39 DEBUG    upload finished in 1.674541s, attributes: file_id=e90958bf9294
2025-05-26 19:25:32,870 SpawnPoolWorker-39 DEBUG    upload finished in 1.675004s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:33,445 SpawnPoolWorker-36 DEBUG    upload finished in 1.357842s, attributes: file_id=4ff2ec8d9664
2025-05-26 19:25:33,445 SpawnPoolWorker-36 DEBUG    upload finished in 1.358635s, attributes: file_id=4ff2ec8d9664
upload:  58%|█████▊    | 613/1056 [01:48<01:44,  4.24it/s]2025-05-26 19:25:33,448 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74bc29a34b39.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:33,640 SpawnPoolWorker-37 DEBUG    upload finished in 1.533387s, attributes: file_id=bf6459598024
2025-05-26 19:25:33,641 SpawnPoolWorker-37 DEBUG    upload finished in 1.534564s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:33,743 SpawnPoolWorker-34 DEBUG    upload finished in 1.559859s, attributes: file_id=4afd344c4a17
2025-05-26 19:25:33,743 SpawnPoolWorker-34 DEBUG    upload finished in 1.560375s, attributes: file_id=4afd344c4a17
upload:  58%|█████▊    | 615/1056 [01:49<01:28,  4.99it/s]2025-05-26 19:25:33,745 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/721d0a5f0f30.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:33,970 SpawnPoolWorker-35 DEBUG    upload finished in 1.68407s, attributes: file_id=55980119847c
2025-05-26 19:25:33,970 SpawnPoolWorker-35 DEBUG    upload finished in 1.684985s, attributes: file_id=55980119847c
upload:  59%|█████▊    | 618/1056 [01:49<01:00,  7.24it/s]2025-05-26 19:25:33,972 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/442cac5437ce.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:34,018 SpawnPoolWorker-39 DEBUG    upload finished in 1.146864s, attributes: file_id=b68523049763
2025-05-26 19:25:34,018 SpawnPoolWorker-39 DEBUG    upload finished in 1.14731s, attributes: file_id=b68523049763
2025-05-26 19:25:34,0

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:34,725 SpawnPoolWorker-36 DEBUG    upload finished in 1.278295s, attributes: file_id=74bc29a34b39
2025-05-26 19:25:34,726 SpawnPoolWorker-36 DEBUG    upload finished in 1.279291s, attributes: file_id=74bc29a34b39
2025-05-26 19:25:34,730 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b431fa3b0fd6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:35,062 SpawnPoolWorker-37 DEBUG    upload finished in 1.418055s, attributes: file_id=8ba353d17de5
2025-05-26 19:25:35,062 SpawnPoolWorker-37 DEBUG    upload finished in 1.418814s, attributes: file_id=8ba353d17de5
upload:  59%|█████▉    | 622/1056 [01:50<01:43,  4.19it/s]2025-05-26 19:25:35,064 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a61c02c25fc8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:35,222 SpawnPoolWorker-34 DEBUG    upload finished in 1.477278s, attributes: file_id=721d0a5f0f30
2025-05-26 19:25:35,222 SpawnPoolWorker-34 DEBUG    upload finished in 1.478275s, attributes: file_id=721d0a5f0f30
upload:  59%|█████▉    | 623/1056 [01:50<01:36,  4.49it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:35,269 SpawnPoolWorker-40 DEBUG    upload finished in 1.40554s, attributes: file_id=8071805ca6b9
2025-05-26 19:25:35,269 SpawnPoolWorker-40 DEBUG    upload finished in 1.405956s, attributes: file_id=8071805ca6b9
2025-05-26 19:25:35,270 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cc840e2f852a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:35,459 SpawnPoolWorker-41 DEBUG    upload finished in 1.6186s, attributes: file_id=0f526aed1a44
2025-05-26 19:25:35,459 SpawnPoolWorker-41 DEBUG    upload finished in 1.61946s, attributes: file_id=0f526aed1a44
upload:  59%|█████▉    | 625/1056 [01:50<01:19,  5.41it/s]2025-05-26 19:25:35,462

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:35,599 SpawnPoolWorker-39 DEBUG    upload finished in 1.580213s, attributes: file_id=32bd77d320da
2025-05-26 19:25:35,600 SpawnPoolWorker-39 DEBUG    upload finished in 1.581078s, attributes: file_id=32bd77d320da
upload:  59%|█████▉    | 626/1056 [01:51<01:15,  5.69it/s]2025-05-26 19:25:35,603 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2012cdb0438e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:35,669 SpawnPoolWorker-35 DEBUG    upload finished in 1.697144s, attributes: file_id=442cac5437ce
2025-05-26 19:25:35,669 SpawnPoolWorker-35 DEBUG    upload finished in 1.697883s, attributes: file_id=442cac5437ce
2025-05-26 19:25:35

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:36,461 SpawnPoolWorker-36 DEBUG    upload finished in 1.729432s, attributes: file_id=b431fa3b0fd6
2025-05-26 19:25:36,473 SpawnPoolWorker-36 DEBUG    upload finished in 1.744277s, attributes: file_id=b431fa3b0fd6
upload:  60%|█████▉    | 629/1056 [01:51<01:50,  3.86it/s]2025-05-26 19:25:36,516 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/654ae7a78cfb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:36,772 SpawnPoolWorker-37 DEBUG    upload finished in 1.708248s, attributes: file_id=a61c02c25fc8
2025-05-26 19:25:36,773 SpawnPoolWorker-37 DEBUG    upload finished in 1.709215s, attributes: file_id=a61c02c25fc8
upload:  60%|█████▉    | 630/1056 [01:52<01:53,  3.74it/s]2025-05-26 19:25:36,810 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/524085e71231.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:36,962 SpawnPoolWorker-34 DEBUG    upload finished in 1.737113s, attributes: file_id=ae57232e8c0b
2025-05-26 19:25:36,963 SpawnPoolWorker-34 DEBUG    upload finished in 1.738915s, attributes: file_id=ae57232e8c0b
upload:  60%|█████▉    | 631/1056 [01:52<01:45,  4.03it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:37,124 SpawnPoolWorker-35 DEBUG    upload finished in 1.452909s, attributes: file_id=a4e69696c68f
2025-05-26 19:25:37,124 SpawnPoolWorker-35 DEBUG    upload finished in 1.453639s, attributes: file_id=a4e69696c68f
upload:  60%|█████▉    | 632/1056 [01:52<01:35,  4.44it/s]2025-05-26 19:25:37,127 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ca47f937a531.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:37,148 SpawnPoolWorker-40 DEBUG    upload finished in 1.877851s, attributes: file_id=cc840e2f852a
2025-05-26 19:25:37,149 SpawnPoolWorker-40 DEBUG    upload finished in 1.879117s, attributes: file_id=cc840e2f852a
2025-05-26 19:25:37

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:25:37,510 SpawnPoolWorker-41 DEBUG    upload finished in 2.04869s, attributes: file_id=f6c6bbf97eb6
2025-05-26 19:25:37,511 SpawnPoolWorker-41 DEBUG    upload finished in 2.050602s, attributes: file_id=f6c6bbf97eb6
upload:  60%|██████    | 636/1056 [01:53<01:00,  6.97it/s]Removed trailing semicolon and whitespace from query
2025-05-26 19:25:37,514 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8b34d0774bca.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:38,095 SpawnPoolWorker-36 DEBUG    upload finished in 1.579005s, attributes: file_id=654ae7a78cfb
upload:  60%|██████    | 637/1056 [01:53<01:39,  4.21it/s]2025-05-26 19:25:38,095 SpawnPoolWorker-36 DEBUG    upload finished in 1.581287s, attributes: file_id=654ae7a78cfb
2025-05-26 19:25:38,097 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f54ed593cbe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:38,478 SpawnPoolWorker-37 DEBUG    upload finished in 1.668393s, attributes: file_id=524085e71231
upload:  60%|██████    | 638/1056 [01:53<01:53,  3.68it/s]2025-05-26 19:25:38,478 SpawnPoolWorker-37 DEBUG    upload finished in 1.672957s, attributes: file_id=524085e71231
2025-05-26 19:25:38,481 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1111f4ceb69e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:38,634 SpawnPoolWorker-34 DEBUG    upload finished in 1.662352s, attributes: file_id=189de956498a
2025-05-26 19:25:38,635 SpawnPoolWorker-34 DEBUG    upload finished in 1.663962s, attributes: file_id=189de956498a
upload:  61%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:38,840 SpawnPoolWorker-40 DEBUG    upload finished in 1.689226s, attributes: file_id=12c4c228fad2
2025-05-26 19:25:38,841 SpawnPoolWorker-40 DEBUG    upload finished in 1.689883s, attributes: file_id=12c4c228fad2
upload:  61%|██████    | 640/1056 [01:54<01:36,  4.29it/s]2025-05-26 19:25:38,843 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ba9b26ca250c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:39,051 SpawnPoolWorker-39 DEBUG    upload finished in 1.748403s, attributes: file_id=a89dc0344021
2025-05-26 19:25:39,056 SpawnPoolWorker-39 DEBUG    upload finished in 1.75545s, attributes: file_id=a89dc0344021
upload:  61%|██████    | 641/1056 [01:54<01:34,  4.37it/s]Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:39,064 SpawnPoolWorker-35 DEBUG    upload finished in 1.937923s, attributes: file_id=ca47f937a531
2025-05-26 19:25:39,065 SpawnPoolWorker-35 DEBUG    upload finished in 1.939546s, attributes: file_id=ca47f937a531
2025-05-26 19:25:39,069 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c515785624c2.json not detected as batch file data
2025-05-26 19:25:39,070 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/489058ed7d25.json not detected as batch file data
A value is trying to be set on a copy of a slice from a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:39,500 SpawnPoolWorker-36 DEBUG    upload finished in 1.403584s, attributes: file_id=1f54ed593cbe
2025-05-26 19:25:39,501 SpawnPoolWorker-36 DEBUG    upload finished in 1.404506s, attributes: file_id=1f54ed593cbe
2025-05-26 19:25:39,503 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bbc3b36ac8c3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:40,232 SpawnPoolWorker-34 DEBUG    upload finished in 1.594357s, attributes: file_id=61455c2d980a
2025-05-26 19:25:40,232 SpawnPoolWorker-34 DEBUG    upload finished in 1.594973s, attributes: file_id=61455c2d980a
upload:  61%|██████    | 646/1056 [01:55<01:48,  3.79it/s]2025-05-26 19:25:40,234 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e7b8e96adbe3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:40,261 SpawnPoolWorker-37 DEBUG    upload finished in 1.781482s, attributes: file_id=1111f4ceb69e
2025-05-26 19:25:40,261 SpawnPoolWorker-37 DEBUG    upload finished in 1.781972s, attributes: file_id=1111f4ceb69e
2025-05-26 19:25:40

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:40,720 SpawnPoolWorker-38 DEBUG    upload finished in 1.575785s, attributes: file_id=7432f8a94e2d
2025-05-26 19:25:40,721 SpawnPoolWorker-38 DEBUG    upload finished in 1.576843s, attributes: file_id=7432f8a94e2d
upload:  61%|██████▏   | 649/1056 [01:56<01:30,  4.52it/s]2025-05-26 19:25:40,724 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/09aff50321c6.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:40,818 SpawnPoolWorker-39 DEBUG    upload finished in 1.75163s, attributes: file_id=c515785624c2
2025-05-26 19:25:40,818 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:40,991 SpawnPoolWorker-36 DEBUG    upload finished in 1.488846s, attributes: file_id=bbc3b36ac8c3
2025-05-26 19:25:40,992 SpawnPoolWorker-36 DEBUG    upload finished in 1.489412s, attributes: file_id=bbc3b36ac8c3
upload:  62%|██████▏   | 652/1056 [01:56<01:08,  5.89it/s]2025-05-26 19:25:40,994 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ce95a75dc7c1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:41,014 SpawnPoolWorker-35 DEBUG    upload finished in 1.944403s, attributes: file_id=489058ed7d25
2025-05-26 19:25:41,014 SpawnPoolWorker-35 DEBUG    upload finished in 1.945024s, attributes: file_id=489058ed7d25
2025-05-26 19:25:41,016 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:41,776 SpawnPoolWorker-37 DEBUG    upload finished in 1.513058s, attributes: file_id=66f0125736de
2025-05-26 19:25:41,776 SpawnPoolWorker-40 DEBUG    upload finished in 1.472598s, attributes: file_id=a274fb1b63aa
2025-05-26 19:25:41,776 SpawnPoolWorker-37 DEBUG    upload finished in 1.513894s, attributes: file_id=66f0125736de
2025-05-26 19:25:41,776 SpawnPoolWorker-40 DEBUG    upload finished in 1.473363s, attributes: file_id=a274fb1b63aa
upload:  62%|██████▏   | 654/1056 [01:57<01:37,  4.13it/s]2025-05-26 19:25:41,781 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cd660b06d0a5.json not detected as batch file data
2025-05-26 19:25:41,781 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac2c85afcb54.json not detected as batch file data
A

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:42,212 SpawnPoolWorker-38 DEBUG    upload finished in 1.489134s, attributes: file_id=09aff50321c6
2025-05-26 19:25:42,213 SpawnPoolWorker-38 DEBUG    upload finished in 1.489585s, attributes: file_id=09aff50321c6
upload:  62%|██████▏   | 657/1056 [01:57<01:20,  4.94it/s]2025-05-26 19:25:42,215 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ef534e2d9dea.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:42,278 SpawnPoolWorker-35 DEBUG    upload finished in 1.262512s, attributes: file_id=7f67c18a6a18
2025-05-26 19:25:42,278 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:25:42,570 SpawnPoolWorker-36 DEBUG    upload finished in 1.576639s, attributes: file_id=ce95a75dc7c1
2025-05-26 19:25:42,570 SpawnPoolWorker-36 DEBUG    upload finished in 1.577248s, attributes: file_id=ce95a75dc7c1
2025-05-26 19:25:42,572 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c35e1e475e81.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:43,224 SpawnPoolWorker-34 DEBUG    upload finished in 1.351498s, attributes: file_id=7a6cb7389a16
2025-05-26 19:25:43,225 SpawnPoolWorker-34 DEBUG    upload finished in 1.352921s, attributes: file_id=7a6cb7389a16
upload:  63%|██████▎   | 662/1056 [01:58<01:27,  4.53it/s]2025-05-26 19:25:43,230 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/13793f4c3f42.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:43,323 SpawnPoolWorker-40 DEBUG    upload finished in 1.542913s, attributes: file_id=ac2c85afcb54
2025-05-26 19:25:43,323 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:43,740 SpawnPoolWorker-35 DEBUG    upload finished in 1.46095s, attributes: file_id=c1c40f053f7a
2025-05-26 19:25:43,740 SpawnPoolWorker-35 DEBUG    upload finished in 1.461583s, attributes: file_id=c1c40f053f7a
upload:  63%|██████▎   | 665/1056 [01:59<01:22,  4.75it/s]2025-05-26 19:25:43,744 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5f55c73084f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:43,842 SpawnPoolWorker-41 DEBUG    upload finished in 1.531862s, attributes: file_id=69381daa3222
2025-05-26 19:25:43,843 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:43,981 SpawnPoolWorker-39 DEBUG    upload finished in 1.631957s, attributes: file_id=3fa187299984
2025-05-26 19:25:43,982 SpawnPoolWorker-39 DEBUG    upload finished in 1.632689s, attributes: file_id=3fa187299984
upload:  63%|██████▎   | 667/1056 [01:59<01:09,  5.57it/s]2025-05-26 19:25:43,985 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed194552da58.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:44,069 SpawnPoolWorker-36 DEBUG    upload finished in 1.497178s, attributes: file_id=c35e1e475e81
2025-05-26 19:25:44,069 SpawnPoolWorker-36 DEBUG    upload finished in 1.497788s, attributes: file_id=c35e1e475e81
2025-05-26 19:25:44,071 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:44,633 SpawnPoolWorker-34 DEBUG    upload finished in 1.405129s, attributes: file_id=13793f4c3f42
2025-05-26 19:25:44,634 SpawnPoolWorker-34 DEBUG    upload finished in 1.406461s, attributes: file_id=13793f4c3f42
upload:  63%|██████▎   | 670/1056 [02:00<01:23,  4.65it/s]2025-05-26 19:25:44,639 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ec0771d845c0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:44,793 SpawnPoolWorker-40 DEBUG    upload finished in 1.466808s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:45,033 SpawnPoolWorker-37 DEBUG    upload finished in 1.645474s, attributes: file_id=697093314f6c
2025-05-26 19:25:45,034 SpawnPoolWorker-37 DEBUG    upload finished in 1.646153s, attributes: file_id=697093314f6c
upload:  64%|██████▎   | 672/1056 [02:00<01:21,  4.73it/s]2025-05-26 19:25:45,036 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/aa5dcc70c822.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:45,404 SpawnPoolWorker-41 DEBUG    upload finished in 1.559153s, attributes: file_id=215701687ebd
2025-05-26 19:25:45,407 SpawnPoolWorker-41 DEBUG    upload finished in 1.562193s, attributes: file_id=215701687ebd
upload:  64%|██████▎   | 673/1056 [02:00<01:37,  3.94it/s]2025-05-26 19:25:45,412 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ba6b757a9645.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:45,549 SpawnPoolWorker-35 DEBUG    upload finished in 1.806388s, attributes: file_id=5f55c73084f7
2025-05-26 19:25:45,550 SpawnPoolWorker-35 DEBUG    upload finished in 1.807135s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:45,683 SpawnPoolWorker-38 DEBUG    upload finished in 1.518868s, attributes: file_id=e938bf9eafb4
2025-05-26 19:25:45,683 SpawnPoolWorker-38 DEBUG    upload finished in 1.519554s, attributes: file_id=e938bf9eafb4
upload:  64%|██████▍   | 676/1056 [02:01<00:58,  6.46it/s]2025-05-26 19:25:45,685 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4d325567140f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:45,704 SpawnPoolWorker-36 DEBUG    upload finished in 1.634378s, attributes: file_id=54a11f63b084
2025-05-26 19:25:45,705 SpawnPoolWorker-36 DEBUG    upload finished in 1.634848s, attributes: file_id=54a11f63b084
2025-05-26 19:25:45,706 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:46,268 SpawnPoolWorker-34 DEBUG    upload finished in 1.630501s, attributes: file_id=ec0771d845c0
2025-05-26 19:25:46,269 SpawnPoolWorker-34 DEBUG    upload finished in 1.631918s, attributes: file_id=ec0771d845c0
upload:  64%|██████▍   | 678/1056 [02:01<01:18,  4.81it/s]2025-05-26 19:25:46,270 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/12fbe226b282.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:46,469 SpawnPoolWorker-40 DEBUG    upload finished in 1.672698s, attributes: file_id=323c1fdfcf59
2025-05-26 19:25:46,470 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:46,476 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53b282a1313a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:46,601 SpawnPoolWorker-37 DEBUG    upload finished in 1.565648s, attributes: file_id=aa5dcc70c822
2025-05-26 19:25:46,602 SpawnPoolWorker-37 DEBUG    upload finished in 1.566839s, attributes: file_id=aa5dcc70c822
upload:  64%|██████▍   | 680/1056 [02:02<01:11,  5.29it/s]2025-05-26 19:25:46,605 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ccae9134b7fd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-doc

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:46,804 SpawnPoolWorker-35 DEBUG    upload finished in 1.252898s, attributes: file_id=28a2eb664c3e
2025-05-26 19:25:46,804 SpawnPoolWorker-35 DEBUG    upload finished in 1.253384s, attributes: file_id=28a2eb664c3e
upload:  64%|██████▍   | 681/1056 [02:02<01:12,  5.20it/s]2025-05-26 19:25:46,806 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2afe03e157c3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:46,866 SpawnPoolWorker-41 DEBUG    upload finished in 1.456576s, attributes: file_id=ba6b757a9645
2025-05-26 19:25:46,870 SpawnPoolWorker-41 DEBUG    upload finished in 1.460808s, attributes: file_id=ba6b757a9645
2025-05-26 19:25:46

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:47,082 SpawnPoolWorker-39 DEBUG    upload finished in 1.522217s, attributes: file_id=768e1b81c5bd
2025-05-26 19:25:47,082 SpawnPoolWorker-39 DEBUG    upload finished in 1.522753s, attributes: file_id=768e1b81c5bd
upload:  65%|██████▍   | 683/1056 [02:02<01:03,  5.88it/s]2025-05-26 19:25:47,083 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d4fd7d355de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:47,089 SpawnPoolWorker-38 DEBUG    upload finished in 1.403675s, attributes: file_id=4d325567140f
2025-05-26 19:25:47,089 SpawnPoolWorker-38 DEBUG    upload finished in 1.404184s, attributes: file_id=4d325567140f
2025-05-26 19:25:47,090 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:47,697 SpawnPoolWorker-34 DEBUG    upload finished in 1.427055s, attributes: file_id=12fbe226b282
2025-05-26 19:25:47,698 SpawnPoolWorker-34 DEBUG    upload finished in 1.427695s, attributes: file_id=12fbe226b282
upload:  65%|██████▍   | 686/1056 [02:03<01:15,  4.88it/s]2025-05-26 19:25:47,700 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d03c9bd27c7a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:47,820 SpawnPoolWorker-40 DEBUG    upload finished in 1.344059s, attributes: file_id=53b282a1313a
2025-05-26 19:25:47,820 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:48,324 SpawnPoolWorker-37 DEBUG    upload finished in 1.720216s, attributes: file_id=ccae9134b7fd
2025-05-26 19:25:48,324 SpawnPoolWorker-37 DEBUG    upload finished in 1.720897s, attributes: file_id=ccae9134b7fd
upload:  65%|██████▌   | 688/1056 [02:03<01:37,  3.77it/s]2025-05-26 19:25:48,327 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d3cc26c31837.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:48,365 SpawnPoolWorker-41 DEBUG    upload finished in 1.490434s, attributes: file_id=fda088c77d17
2025-05-26 19:25:48,366 SpawnPoolWorker-41 DEBUG    upload finished in 1.491608s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:48,573 SpawnPoolWorker-38 DEBUG    upload finished in 1.483656s, attributes: file_id=7cceb2ff086c
2025-05-26 19:25:48,574 SpawnPoolWorker-38 DEBUG    upload finished in 1.484102s, attributes: file_id=7cceb2ff086c
upload:  65%|██████▌   | 691/1056 [02:04<01:02,  5.84it/s]2025-05-26 19:25:48,575 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c5f39657525c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:49,018 SpawnPoolWorker-39 DEBUG    upload finished in 1.934571s, attributes: file_id=9d4fd7d355de
2025-05-26 19:25:49,018 SpawnPoolWorker-39 DEBUG    upload finished in 1.935356s, attributes: file_id=9d4fd7d355de
upload:  66%|██████▌   | 692/1056 [02:04<01:21,  4.44it/s]2025-05-26 19:25:49,021 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f8ce65c54b5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:49,112 SpawnPoolWorker-36 DEBUG    upload finished in 1.891063s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:49,351 SpawnPoolWorker-34 DEBUG    upload finished in 1.651459s, attributes: file_id=d03c9bd27c7a
2025-05-26 19:25:49,351 SpawnPoolWorker-34 DEBUG    upload finished in 1.652429s, attributes: file_id=d03c9bd27c7a
upload:  66%|██████▌   | 694/1056 [02:04<01:13,  4.90it/s]2025-05-26 19:25:49,355 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/397446cc306b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:49,400 SpawnPoolWorker-40 DEBUG    upload finished in 1.578494s, attributes: file_id=29e0e2780ad3
2025-05-26 19:25:49,400 SpawnPoolWorker-40 DEBUG    upload finished in 1.57901s, attributes: file_id=29e0e2780ad3
2025-05-26 19:25:49,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:49,734 SpawnPoolWorker-37 DEBUG    upload finished in 1.407393s, attributes: file_id=d3cc26c31837
2025-05-26 19:25:49,734 SpawnPoolWorker-37 DEBUG    upload finished in 1.408043s, attributes: file_id=d3cc26c31837
upload:  66%|██████▌   | 696/1056 [02:05<01:11,  5.01it/s]2025-05-26 19:25:49,736 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/80b71c877cd0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:49,901 SpawnPoolWorker-41 DEBUG    upload finished in 1.534727s, attributes: file_id=26aacae66562
2025-05-26 19:25:49,902 SpawnPoolWorker-41 DEBUG    upload finished in 1.535371s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:50,072 SpawnPoolWorker-38 DEBUG    upload finished in 1.497284s, attributes: file_id=c5f39657525c
2025-05-26 19:25:50,073 SpawnPoolWorker-38 DEBUG    upload finished in 1.497905s, attributes: file_id=c5f39657525c
upload:  66%|██████▌   | 698/1056 [02:05<01:07,  5.31it/s]2025-05-26 19:25:50,075 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5915225e2b1e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:50,237 SpawnPoolWorker-35 DEBUG    upload finished in 1.812752s, attributes: file_id=2f091014e8d7
2025-05-26 19:25:50,238 SpawnPoolWorker-35 DEBUG    upload finished in 1.813369s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:50,375 SpawnPoolWorker-39 DEBUG    upload finished in 1.355082s, attributes: file_id=1f8ce65c54b5
2025-05-26 19:25:50,376 SpawnPoolWorker-39 DEBUG    upload finished in 1.355861s, attributes: file_id=1f8ce65c54b5
upload:  66%|██████▋   | 700/1056 [02:05<01:01,  5.83it/s]2025-05-26 19:25:50,378 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5bff9f970545.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:50,522 SpawnPoolWorker-36 DEBUG    upload finished in 1.408432s, attributes: file_id=32f545c25a43
2025-05-26 19:25:50,522 SpawnPoolWorker-36 DEBUG    upload finished in 1.409035s, attributes: file_id=32f545c25a43
upload:  66%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:50,792 SpawnPoolWorker-40 DEBUG    upload finished in 1.390554s, attributes: file_id=837ffd873211
2025-05-26 19:25:50,793 SpawnPoolWorker-40 DEBUG    upload finished in 1.391122s, attributes: file_id=837ffd873211
upload:  66%|██████▋   | 702/1056 [02:06<01:08,  5.16it/s]2025-05-26 19:25:50,795 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/89dad04c13de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:50,932 SpawnPoolWorker-34 DEBUG    upload finished in 1.577934s, attributes: file_id=397446cc306b
2025-05-26 19:25:50,932 SpawnPoolWorker-34 DEBUG    upload finished in 1.578459s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:51,297 SpawnPoolWorker-41 DEBUG    upload finished in 1.393196s, attributes: file_id=3057c7d22745
2025-05-26 19:25:51,297 SpawnPoolWorker-41 DEBUG    upload finished in 1.393864s, attributes: file_id=3057c7d22745
upload:  67%|██████▋   | 704/1056 [02:06<01:21,  4.31it/s]2025-05-26 19:25:51,299 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/58388b65f043.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:51,371 SpawnPoolWorker-37 DEBUG    upload finished in 1.635429s, attributes: file_id=80b71c877cd0
2025-05-26 19:25:51,371 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:51,994 SpawnPoolWorker-35 DEBUG    upload finished in 1.75374s, attributes: file_id=e7556cf44261
2025-05-26 19:25:51,994 SpawnPoolWorker-35 DEBUG    upload finished in 1.754608s, attributes: file_id=e7556cf44261
upload:  67%|██████▋   | 707/1056 [02:07<01:29,  3.91it/s]2025-05-26 19:25:51,997 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/52c59047136a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:52,123 SpawnPoolWorker-39 DEBUG    upload finished in 1.745577s, attributes: file_id=5bff9f970545
2025-05-26 19:25:52,124 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:52,292 SpawnPoolWorker-36 DEBUG    upload finished in 1.767991s, attributes: file_id=9d0ddb7f22fd
2025-05-26 19:25:52,293 SpawnPoolWorker-36 DEBUG    upload finished in 1.770056s, attributes: file_id=9d0ddb7f22fd
upload:  67%|██████▋   | 709/1056 [02:07<01:12,  4.79it/s]2025-05-26 19:25:52,298 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/95a04c3dd59d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:52,539 SpawnPoolWorker-40 DEBUG    upload finished in 1.745621s, attributes: file_id=89dad04c13de
2025-05-26 19:25:52,540 SpawnPoolWorker-40 DEBUG    upload finished in 1.746191s, attributes: file_id=89dad04c13de
upload:  67%|██████▋   | 710/1056 [02:08<01:15,  4.57it/s]2025-05-26 19:25:52,542 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5ea248c6f940.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:52,762 SpawnPoolWorker-34 DEBUG    upload finished in 1.829458s, attributes: file_id=c630f32ed3a4
2025-05-26 19:25:52,763 SpawnPoolWorker-34 DEBUG    upload finished in 1.82995s, attributes: file_id=c630f32ed3a4
upload:  67%|██████▋   | 711/1056 [02:08<01:15,  4.54it/s]2025-05-26 19:25:52,765 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b0b12664fc31.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:52,901 SpawnPoolWorker-38 DEBUG    upload finished in 1.452825s, attributes: file_id=1ba6326b829a
2025-05-26 19:25:52,902 SpawnPoolWorker-38 DEBUG    upload finished in 1.453388s, attributes: file_id=1ba6326b829a
upload:  67%|██████▋   | 712/1056 [02:08<01:07,  5.08it/s]2025-05-26 19:2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:53,010 SpawnPoolWorker-41 DEBUG    upload finished in 1.711609s, attributes: file_id=58388b65f043
2025-05-26 19:25:53,010 SpawnPoolWorker-41 DEBUG    upload finished in 1.712441s, attributes: file_id=58388b65f043
upload:  68%|██████▊   | 713/1056 [02:08<00:58,  5.85it/s]2025-05-26 19:25:53,013 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8784d56e1a38.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:53,098 SpawnPoolWorker-37 DEBUG    upload finished in 1.725879s, attributes: file_id=59e602562ecb
2025-05-26 19:25:53,099 SpawnPoolWorker-37 DEBUG    upload finished in 1.726444s, attributes: file_id=59e602562ecb
2025-05-26 19:25:53

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:53,681 SpawnPoolWorker-35 DEBUG    upload finished in 1.685202s, attributes: file_id=52c59047136a
2025-05-26 19:25:53,682 SpawnPoolWorker-35 DEBUG    upload finished in 1.685846s, attributes: file_id=52c59047136a
upload:  68%|██████▊   | 715/1056 [02:09<01:23,  4.07it/s]2025-05-26 19:25:53,684 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/74e692263842.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:53,868 SpawnPoolWorker-39 DEBUG    upload finished in 1.741694s, attributes: file_id=7231f5d9eda3
2025-05-26 19:25:53,868 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:54,197 SpawnPoolWorker-36 DEBUG    upload finished in 1.899567s, attributes: file_id=95a04c3dd59d
2025-05-26 19:25:54,197 SpawnPoolWorker-36 DEBUG    upload finished in 1.900732s, attributes: file_id=95a04c3dd59d
upload:  68%|██████▊   | 717/1056 [02:09<01:27,  3.89it/s]2025-05-26 19:25:54,202 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16127e212826.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:54,320 SpawnPoolWorker-40 DEBUG    upload finished in 1.778482s, attributes: file_id=5ea248c6f940
2025-05-26 19:25:54,320 SpawnPoolWorker-40 DEBUG    upload finished in 1.778972s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:54,431 SpawnPoolWorker-34 DEBUG    upload finished in 1.667343s, attributes: file_id=b0b12664fc31
2025-05-26 19:25:54,431 SpawnPoolWorker-34 DEBUG    upload finished in 1.66782s, attributes: file_id=b0b12664fc31
upload:  68%|██████▊   | 719/1056 [02:09<01:04,  5.26it/s]2025-05-26 19:25:54,434 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a45daba4dace.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:54,577 SpawnPoolWorker-41 DEBUG    upload finished in 1.565126s, attributes: file_id=8784d56e1a38
2025-05-26 19:25:54,578 SpawnPoolWorker-41 DEBUG    upload finished in 1.565753s, attributes: file_id=8784d56e1a38
upload:  68%|██████▊   | 720/1056 [02:10<00:59,  5.63it/s]2025-05-26 19:2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:54,635 SpawnPoolWorker-38 DEBUG    upload finished in 1.731941s, attributes: file_id=d81c3daaf06e
2025-05-26 19:25:54,636 SpawnPoolWorker-38 DEBUG    upload finished in 1.7327s, attributes: file_id=d81c3daaf06e
2025-05-26 19:25:54,638 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9fc49d523567.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:54,968 SpawnPoolWorker-37 DEBUG    upload finished in 1.868127s, attributes: file_id=61e9b333deaf
2025-05-26 19:25:54,969 SpawnPoolWorker-37 DEBUG    upload finished in 1.868822s, attributes: file_id=61e9b333deaf
upload:  68%|██████▊   | 722/1056 [02:10<01:01,  5.39it/s]2025-05-26 19:25:54,972 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3f6f92f6bb96.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:55,346 SpawnPoolWorker-35 DEBUG    upload finished in 1.662852s, attributes: file_id=74e692263842
2025-05-26 19:25:55,347 SpawnPoolWorker-35 DEBUG    upload finished in 1.663532s, attributes: file_id=74e692263842
upload:  68%|██████▊   | 723/1056 [02:10<01:17,  4.30it/s]2025-05-26 19:25:55,350 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3b148abb26f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:55,438 SpawnPoolWorker-39 DEBUG    upload finished in 1.568952s, attributes: file_id=2b78d99e26d3
2025-05-26 19:25:55,439 SpawnPoolWorker-39 DEBUG    upload finished in 1.569564s, attributes: file_id=2b78d99e26d3
2025-05-26 19:25:55

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:55,922 SpawnPoolWorker-36 DEBUG    upload finished in 1.721408s, attributes: file_id=16127e212826
2025-05-26 19:25:55,923 SpawnPoolWorker-36 DEBUG    upload finished in 1.72282s, attributes: file_id=16127e212826
upload:  69%|██████▊   | 725/1056 [02:11<01:24,  3.92it/s]2025-05-26 19:25:55,927 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/115039929c28.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:56,238 SpawnPoolWorker-40 DEBUG    upload finished in 1.917028s, attributes: file_id=fbb925993f57
2025-05-26 19:25:56,239 SpawnPoolWorker-40 DEBUG    upload finished in 1.917676s, attributes: file_id=fbb925993f57
upload:  69%|██████▉   | 726/1056 [02:11<01:28,  3.72it/s]2025-05-26 19:25:56,241 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7284e1de9bb8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:56,269 SpawnPoolWorker-41 DEBUG    upload finished in 1.690284s, attributes: file_id=37af029ecc77
2025-05-26 19:25:56,269 SpawnPoolWorker-41 DEBUG    upload finished in 1.69074s, attributes: file_id=37af029ecc77
2025-05-26 19:25:56,271 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
2025-05-26 19:25:56,649 SpawnPoolWorker-37 DEBUG    upload finished in 1.678111s, attributes: file_id=3f6f92f6bb96
2025-05-26 19:25:56,649 SpawnPoolWorker-37 DEBUG    upload finished in 1.678728s, attributes: file_id=3f6f92f6bb96
upload:  69%|██████▉   | 730/1056 [02:12<00:58,  5.62it/s]2025-05-26 19:25:56,651 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a307861850d2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:57,001 SpawnPoolWorker-39 DEBUG    upload finished in 1.560685s, attributes: file_id=9685822e72ac
2025-05-26 19:25:57,001 SpawnPoolWorker-39 DEBUG    upload finished in 1.561258s, attributes: file_id=9685822e72ac
upload:  69%|██████▉   | 731/1056 [02:12<01:09,  4.65it/s]2025-05-26 19:25:57,004 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dcd3d342667c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:57,063 SpawnPoolWorker-35 DEBUG    upload finished in 1.714321s, attributes: file_id=3b148abb26f3
2025-05-26 19:25:57,063 SpawnPoolWorker-35 DEBUG    upload finished in 1.715063s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:57,408 SpawnPoolWorker-36 DEBUG    upload finished in 1.482934s, attributes: file_id=115039929c28
2025-05-26 19:25:57,409 SpawnPoolWorker-36 DEBUG    upload finished in 1.483933s, attributes: file_id=115039929c28
upload:  69%|██████▉   | 733/1056 [02:12<01:08,  4.74it/s]2025-05-26 19:25:57,413 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e9bec0f15f18.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:57,595 SpawnPoolWorker-41 DEBUG    upload finished in 1.324431s, attributes: file_id=986b355da2bf
2025-05-26 19:25:57,596 SpawnPoolWorker-41 DEBUG    upload finished in 1.325167s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:57,689 SpawnPoolWorker-40 DEBUG    upload finished in 1.449042s, attributes: file_id=7284e1de9bb8
2025-05-26 19:25:57,690 SpawnPoolWorker-40 DEBUG    upload finished in 1.449676s, attributes: file_id=7284e1de9bb8
2025-05-26 19:25:57,692 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/07ebccce8756.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:57,807 SpawnPoolWorker-38 DEBUG    upload finished in 1.481635s, attributes: file_id=4ad992878e4d
2025-05-26 19:25:57,808 SpawnPoolWorker-38 DEBUG    upload finished in 1.482404s, attributes: file_id=4ad992878e4d
upload:  70%|██████▉   | 736/1056 [02:13<00:53,  5.95it/s]2025-05-26 19:25:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:58,032 SpawnPoolWorker-34 DEBUG    upload finished in 1.606427s, attributes: file_id=05d28d68a728
2025-05-26 19:25:58,033 SpawnPoolWorker-37 DEBUG    upload finished in 1.382365s, attributes: file_id=a307861850d2
2025-05-26 19:25:58,033 SpawnPoolWorker-34 DEBUG    upload finished in 1.607348s, attributes: file_id=05d28d68a728
2025-05-26 19:25:58,033 SpawnPoolWorker-37 DEBUG    upload finished in 1.382948s, attributes: file_id=a307861850d2
upload:  70%|██████▉   | 737/1056 [02:13<00:57,  5.55it/s]2025-05-26 19:25:58,036 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/16765ae985f6.json not detected as batch file data
2025-05-26 19:25:58,036 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cb1161a3156e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:58,485 SpawnPoolWorker-39 DEBUG    upload finished in 1.482246s, attributes: file_id=dcd3d342667c
2025-05-26 19:25:58,486 SpawnPoolWorker-39 DEBUG    upload finished in 1.483279s, attributes: file_id=dcd3d342667c
upload:  70%|██████▉   | 739/1056 [02:13<01:02,  5.06it/s]2025-05-26 19:25:58,491 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ff0dd8a407d4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:58,631 SpawnPoolWorker-35 DEBUG    upload finished in 1.566105s, attributes: file_id=986f7d5d8ec7
2025-05-26 19:25:58,631 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:58,950 SpawnPoolWorker-36 DEBUG    upload finished in 1.538832s, attributes: file_id=e9bec0f15f18
2025-05-26 19:25:58,951 SpawnPoolWorker-36 DEBUG    upload finished in 1.539831s, attributes: file_id=e9bec0f15f18
upload:  70%|███████   | 741/1056 [02:14<01:08,  4.60it/s]2025-05-26 19:25:58,954 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/69419522689e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:59,047 SpawnPoolWorker-41 DEBUG    upload finished in 1.44938s, attributes: file_id=032ab8a11c7c
2025-05-26 19:25:59,047 SpawnPoolWorker-41 DEBUG    upload finished in 1.449995s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:25:59,275 SpawnPoolWorker-38 DEBUG    upload finished in 1.465228s, attributes: file_id=0a5b4f654033
2025-05-26 19:25:59,275 SpawnPoolWorker-38 DEBUG    upload finished in 1.465903s, attributes: file_id=0a5b4f654033
upload:  70%|███████   | 744/1056 [02:14<00:51,  6.09it/s]2025-05-26 19:25:59,277 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/084057ae8b52.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:25:59,574 SpawnPoolWorker-37 DEBUG    upload finished in 1.53857s, attributes: file_id=16765ae985f6
2025-05-26 19:25:59,574 SpawnPoolWorker-37 DEBUG    upload finished in 1.53914s, attributes: file_id=16765ae985f6
upload:  71%|███████   | 745/1056 [02:15<01:01,  5.08it/s]2025-05-26 19:25:59,576 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8cc34ffd2ce9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:25:59,670 SpawnPoolWorker-34 DEBUG    upload finished in 1.635077s, attributes: file_id=cb1161a3156e
2025-05-26 19:25:59,671 SpawnPoolWorker-34 DEBUG    upload finished in 1.635766s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:25:59,810 SpawnPoolWorker-39 DEBUG    upload finished in 1.321362s, attributes: file_id=ff0dd8a407d4
2025-05-26 19:25:59,811 SpawnPoolWorker-39 DEBUG    upload finished in 1.322082s, attributes: file_id=ff0dd8a407d4
2025-05-26 19:25:59,812 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1713d160caa0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:00,293 SpawnPoolWorker-36 DEBUG    upload finished in 1.340687s, attributes: file_id=69419522689e
2025-05-26 19:26:00,294 SpawnPoolWorker-36 DEBUG    upload finished in 1.341585s, attributes: file_id=69419522689e
upload:  71%|███████   | 749/1056 [02:15<01:00,  5.10it/s]2025-05-26 19:26:00,297 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9d24694ae898.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:00,332 SpawnPoolWorker-41 DEBUG    upload finished in 1.283297s, attributes: file_id=01a14730d0fc
2025-05-26 19:26:00,333 SpawnPoolWorker-41 DEBUG    upload finished in 1.283996s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:00,639 SpawnPoolWorker-38 DEBUG    upload finished in 1.362419s, attributes: file_id=084057ae8b52
2025-05-26 19:26:00,639 SpawnPoolWorker-38 DEBUG    upload finished in 1.363187s, attributes: file_id=084057ae8b52
upload:  71%|███████   | 752/1056 [02:16<00:48,  6.21it/s]2025-05-26 19:26:00,642 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7a7cd88348fc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:01,015 SpawnPoolWorker-37 DEBUG    upload finished in 1.439317s, attributes: file_id=8cc34ffd2ce9
2025-05-26 19:26:01,015 SpawnPoolWorker-37 DEBUG    upload finished in 1.439881s, attributes: file_id=8cc34ffd2ce9
upload:  71%|███████▏  | 753/1056 [02:16<01:03,  4.80it/s]2025-05-26 19:26:01,017 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5451e1ba3236.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:01,196 SpawnPoolWorker-34 DEBUG    upload finished in 1.524196s, attributes: file_id=d267bd405013
2025-05-26 19:26:01,197 SpawnPoolWorker-34 DEBUG    upload finished in 1.524752s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:01,338 SpawnPoolWorker-35 DEBUG    upload finished in 1.576068s, attributes: file_id=9f78adaef5b1
2025-05-26 19:26:01,338 SpawnPoolWorker-39 DEBUG    upload finished in 1.526012s, attributes: file_id=1713d160caa0
2025-05-26 19:26:01,338 SpawnPoolWorker-39 DEBUG    upload finished in 1.526577s, attributes: file_id=1713d160caa0
2025-05-26 19:26:01,338 SpawnPoolWorker-35 DEBUG    upload finished in 1.576765s, attributes: file_id=9f78adaef5b1
upload:  71%|███████▏  | 755/1056 [02:16<00:56,  5.36it/s]2025-05-26 19:26:01,341 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c9783a8d2c5.json not detected as batch file data
2025-05-26 19:26:01,341 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/90877ff19593.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:01,812 SpawnPoolWorker-41 DEBUG    upload finished in 1.477353s, attributes: file_id=65c217cbdef1
2025-05-26 19:26:01,812 SpawnPoolWorker-41 DEBUG    upload finished in 1.477903s, attributes: file_id=65c217cbdef1
upload:  72%|███████▏  | 757/1056 [02:17<01:02,  4.81it/s]2025-05-26 19:26:01,814 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5fb71abe2003.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:01,939 SpawnPoolWorker-36 DEBUG    upload finished in 1.643372s, attributes: file_id=9d24694ae898
2025-05-26 19:26:01,940 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:02,090 SpawnPoolWorker-40 DEBUG    upload finished in 1.620527s, attributes: file_id=27d93086bf59
2025-05-26 19:26:02,090 SpawnPoolWorker-40 DEBUG    upload finished in 1.621286s, attributes: file_id=27d93086bf59
upload:  72%|███████▏  | 759/1056 [02:17<00:53,  5.57it/s]2025-05-26 19:26:02,093 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/86d23bcc8498.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:02,242 SpawnPoolWorker-38 DEBUG    upload finished in 1.601031s, attributes: file_id=7a7cd88348fc
2025-05-26 19:26:02,242 SpawnPoolWorker-38 DEBUG    upload finished in 1.601551s, attributes: file_id=7a7cd88348fc
upload:  72%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:02,583 SpawnPoolWorker-37 DEBUG    upload finished in 1.566348s, attributes: file_id=5451e1ba3236
2025-05-26 19:26:02,583 SpawnPoolWorker-37 DEBUG    upload finished in 1.566853s, attributes: file_id=5451e1ba3236
upload:  72%|███████▏  | 761/1056 [02:18<01:04,  4.58it/s]2025-05-26 19:26:02,588 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/42bc03ea34f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:02,775 SpawnPoolWorker-34 DEBUG    upload finished in 1.576826s, attributes: file_id=fa2b12bd9cbc
2025-05-26 19:26:02,776 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:02,895 SpawnPoolWorker-39 DEBUG    upload finished in 1.555134s, attributes: file_id=6c9783a8d2c5
2025-05-26 19:26:02,896 SpawnPoolWorker-39 DEBUG    upload finished in 1.555807s, attributes: file_id=6c9783a8d2c5
upload:  72%|███████▏  | 763/1056 [02:18<00:54,  5.40it/s]2025-05-26 19:26:02,899 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a01f088658de.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:02,990 SpawnPoolWorker-35 DEBUG    upload finished in 1.650534s, attributes: file_id=90877ff19593
2025-05-26 19:26:02,991 SpawnPoolWorker-35 DEBUG    upload finished in 1.651231s, attributes: file_id=90877ff19593
2025-05-26 19:26:02

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:03,257 SpawnPoolWorker-41 DEBUG    upload finished in 1.443866s, attributes: file_id=5fb71abe2003
2025-05-26 19:26:03,257 SpawnPoolWorker-41 DEBUG    upload finished in 1.444435s, attributes: file_id=5fb71abe2003
upload:  72%|███████▏  | 765/1056 [02:18<00:53,  5.46it/s]2025-05-26 19:26:03,259 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2a0b1657b490.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:03,569 SpawnPoolWorker-40 DEBUG    upload finished in 1.477521s, attributes: file_id=86d23bcc8498
2025-05-26 19:26:03,569 SpawnPoolWorker-40 DEBUG    upload finished in 1.47839s, attributes: file_id=86d23bcc8498
upload:  73%|███████▎  | 766/1056 [02:19<01:02,  4.66it/s]2025-05-26 19:26:03,573 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9fb9ed894fa3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:03,741 SpawnPoolWorker-38 DEBUG    upload finished in 1.497508s, attributes: file_id=03c32c10a58d
2025-05-26 19:26:03,741 SpawnPoolWorker-38 DEBUG    upload finished in 1.498308s, attributes: file_id=03c32c10a58d
upload:  73%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:03,807 SpawnPoolWorker-36 DEBUG    upload finished in 1.865079s, attributes: file_id=4461abda905c
2025-05-26 19:26:03,808 SpawnPoolWorker-36 DEBUG    upload finished in 1.867058s, attributes: file_id=4461abda905c
2025-05-26 19:26:03,811 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4af1b3ff6247.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:03,924 SpawnPoolWorker-37 DEBUG    upload finished in 1.336332s, attributes: file_id=42bc03ea34f7
2025-05-26 19:26:03,924 SpawnPoolWorker-37 DEBUG    upload finished in 1.337006s, attributes: file_id=42bc03ea34f7
upload:  73%|███████▎  | 769/1056 [02:19<00:44,  6.41it/s]2025-05-26 19:26:03

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:04,290 SpawnPoolWorker-34 DEBUG    upload finished in 1.512794s, attributes: file_id=730e4d215721
2025-05-26 19:26:04,290 SpawnPoolWorker-34 DEBUG    upload finished in 1.513335s, attributes: file_id=730e4d215721
upload:  73%|███████▎  | 770/1056 [02:19<00:58,  4.88it/s]2025-05-26 19:26:04,292 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/78189fca4004.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:04,460 SpawnPoolWorker-39 DEBUG    upload finished in 1.561984s, attributes: file_id=a01f088658de
2025-05-26 19:26:04,461 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:04,566 SpawnPoolWorker-35 DEBUG    upload finished in 1.573577s, attributes: file_id=cc44398065d2
2025-05-26 19:26:04,566 SpawnPoolWorker-35 DEBUG    upload finished in 1.574352s, attributes: file_id=cc44398065d2
upload:  73%|███████▎  | 772/1056 [02:20<00:48,  5.80it/s]2025-05-26 19:26:04,570 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b471ab186e84.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:05,025 SpawnPoolWorker-41 DEBUG    upload finished in 1.766141s, attributes: file_id=2a0b1657b490
2025-05-26 19:26:05,025 SpawnPoolWorker-41 DEBUG    upload finished in 1.766836s, attributes: file_id=2a0b1657b490
upload:  73%|███████▎  | 773/1056 [02:20<01:10,  3.99it/s]2025-05-26 19:26:05,028 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c9ffe0b988f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:05,162 SpawnPoolWorker-40 DEBUG    upload finished in 1.588887s, attributes: file_id=9fb9ed894fa3
2025-05-26 19:26:05,165 SpawnPoolWorker-40 DEBUG    upload finished in 1.592185s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:05,360 SpawnPoolWorker-37 DEBUG    upload finished in 1.434201s, attributes: file_id=4ba829eaf35b
upload:  73%|███████▎  | 776/1056 [02:20<00:46,  6.03it/s]2025-05-26 19:26:05,363 SpawnPoolWorker-37 DEBUG    upload finished in 1.437626s, attributes: file_id=4ba829eaf35b
2025-05-26 19:26:05,365 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1ea7a1179d95.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:05,399 SpawnPoolWorker-38 DEBUG    upload finished in 1.655022s, attributes: file_id=8bbf049b542e
2025-05-26 19:26:05,399 SpawnPoolWorker-38 DEBUG    upload finished in 1.65555s, attributes: file_id=8bbf049b542e
2025-05-26 19:26:05,400 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:05,874 SpawnPoolWorker-34 DEBUG    upload finished in 1.582339s, attributes: file_id=78189fca4004
2025-05-26 19:26:05,875 SpawnPoolWorker-34 DEBUG    upload finished in 1.583852s, attributes: file_id=78189fca4004
upload:  74%|███████▎  | 778/1056 [02:21<00:55,  4.97it/s]2025-05-26 19:26:05,878 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3f17feed24ac.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:05,919 SpawnPoolWorker-35 DEBUG    upload finished in 1.349582s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:06,582 SpawnPoolWorker-41 DEBUG    upload finished in 1.55449s, attributes: file_id=6c9ffe0b988f
2025-05-26 19:26:06,587 SpawnPoolWorker-41 DEBUG    upload finished in 1.559844s, attributes: file_id=6c9ffe0b988f
upload:  74%|███████▍  | 781/1056 [02:22<01:06,  4.16it/s]2025-05-26 19:26:06,591 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cd5c108a236c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:06,746 SpawnPoolWorker-40 DEBUG    upload finished in 1.580695s, attributes: file_id=8d9b1a142a34
2025-05-26 19:26:06,747 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:06,793 SpawnPoolWorker-36 DEBUG    upload finished in 1.580353s, attributes: file_id=bbf468830b38
2025-05-26 19:26:06,793 SpawnPoolWorker-36 DEBUG    upload finished in 1.581706s, attributes: file_id=bbf468830b38
2025-05-26 19:26:06,794 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/888fb21ca63a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:06,870 SpawnPoolWorker-38 DEBUG    upload finished in 1.470481s, attributes: file_id=198cf2d90161
2025-05-26 19:26:06,870 SpawnPoolWorker-38 DEBUG    upload finished in 1.470887s, attributes: file_id=198cf2d90161
upload:  74%|███████▍  | 784/1056 [02:22<00:43,  6.27it/s]2025-05-26 19:26:06,873 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:07,338 SpawnPoolWorker-39 DEBUG    upload finished in 1.339745s, attributes: file_id=1d0b61e2d2df
2025-05-26 19:26:07,338 SpawnPoolWorker-39 DEBUG    upload finished in 1.340404s, attributes: file_id=1d0b61e2d2df
upload:  74%|███████▍  | 786/1056 [02:22<00:51,  5.21it/s]2025-05-26 19:26:07,340 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c840eb7d53d8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:08,107 SpawnPoolWorker-41 DEBUG    upload finished in 1.517744s, attributes: file_id=cd5c108a236c
2025-05-26 19:26:08,108 SpawnPoolWorker-41 DEBUG    upload finished in 1.518687s, attributes: file_id=cd5c108a236c
upload:  75%|███████▍  | 789/1056 [02:23<01:06,  4.00it/s]2025-05-26 19:26:08,111 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/33ebfd45f56e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:08,277 SpawnPoolWorker-40 DEBUG    upload finished in 1.529541s, attributes: file_id=d0039da21aa1
2025-05-26 19:26:08,278 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:08,418 SpawnPoolWorker-38 DEBUG    upload finished in 1.546278s, attributes: file_id=c92489bb1018
2025-05-26 19:26:08,418 SpawnPoolWorker-38 DEBUG    upload finished in 1.546777s, attributes: file_id=c92489bb1018
upload:  75%|███████▍  | 791/1056 [02:23<00:54,  4.82it/s]2025-05-26 19:26:08,420 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac4e76da28b5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:08,618 SpawnPoolWorker-36 DEBUG    upload finished in 1.823899s, attributes: file_id=888fb21ca63a
2025-05-26 19:26:08,618 SpawnPoolWorker-36 DEBUG    upload finished in 1.824375s, attributes: file_id=888fb21ca63a
upload:  75%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:08,622 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dae7750f0fa0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:08,713 SpawnPoolWorker-37 DEBUG    upload finished in 1.642466s, attributes: file_id=a2b747f02cff
2025-05-26 19:26:08,713 SpawnPoolWorker-37 DEBUG    upload finished in 1.643043s, attributes: file_id=a2b747f02cff
2025-05-26 19:26:08,715 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/50cb06238bc2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:08,837 SpawnPoolWorker-35 DEBUG    upload finished in 1.464849s, attributes: file_id=6b8d2097201f
2025-05-26 19:26:08,837 SpawnPoolWorker-35 DEBUG    upload finished in 1.465315s, attributes: file_id=6b8d2097201f
upload:  75%|███████▌  | 794/1056 [02:24<00:42,  6.11it/s]2025-05-26 19:26:08,839 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3cee685d4c29.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:08,925 SpawnPoolWorker-39 DEBUG    upload finished in 1.585816s, attributes: file_id=c840eb7d53d8
2025-05-26 19:26:08,926 SpawnPoolWorker-39 DEBUG    upload finished in 1.586334s, attributes: file_id=c840eb7d53d8
2025-05-26 19:26:08,927 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:09,061 SpawnPoolWorker-34 DEBUG    upload finished in 1.552708s, attributes: file_id=b53a1074a950
2025-05-26 19:26:09,061 SpawnPoolWorker-34 DEBUG    upload finished in 1.553644s, attributes: file_id=b53a1074a950
upload:  75%|███████▌  | 796/1056 [02:24<00:37,  6.95it/s]2025-05-26 19:26:09,063 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b5a3e3abfdbb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:09,596 SpawnPoolWorker-41 DEBUG    upload finished in 1.486931s, attributes: file_id=33ebfd45f56e
2025-05-26 19:26:09,597 SpawnPoolWorker-41 DEBUG    upload finished in 1.488034s, attributes: file_id=33ebfd45f56e
upload:  75%|███████▌  | 797/1056 [02:25<00:59,  4.38it/s]2025-05-26 19:26:09,599 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/939fefd52114.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:09,842 SpawnPoolWorker-40 DEBUG    upload finished in 1.560944s, attributes: file_id=ce9ca9ce462c
2025-05-26 19:26:09,842 SpawnPoolWorker-40 DEBUG    upload finished in 1.561489s, attributes: file_id=ce9ca9ce462c
upload:  76%|███████▌  | 798/1056 [02:25<00:59,  4.31it/s]2025-05-26 19:26:09,845 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed83f2d51e5d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:10,113 SpawnPoolWorker-38 DEBUG    upload finished in 1.693416s, attributes: file_id=ac4e76da28b5
2025-05-26 19:26:10,113 SpawnPoolWorker-38 DEBUG    upload finished in 1.693892s, attributes: file_id=ac4e76da28b5
upload:  76%|███████▌  | 799/1056 [02:25<01:02,  4.14it/s]2025-05-26 19:26:10,116 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d561509199c7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:10,214 SpawnPoolWorker-37 DEBUG    upload finished in 1.500177s, attributes: file_id=50cb06238bc2
2025-05-26 19:26:10,215 SpawnPoolWorker-37 DEBUG    upload finished in 1.50083s, attributes: file_id=50cb06238bc2
upload:  76%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:10,343 SpawnPoolWorker-35 DEBUG    upload finished in 1.505094s, attributes: file_id=3cee685d4c29
2025-05-26 19:26:10,344 SpawnPoolWorker-35 DEBUG    upload finished in 1.505904s, attributes: file_id=3cee685d4c29
upload:  76%|███████▌  | 802/1056 [02:25<00:36,  6.92it/s]2025-05-26 19:26:10,347 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5c3d158dd18f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:10,364 SpawnPoolWorker-34 DEBUG    upload finished in 1.301563s, attributes: file_id=b5a3e3abfdbb
2025-05-26 19:26:10,364 SpawnPoolWorker-34 DEBUG    upload finished in 1.302069s, attributes: file_id=b5a3e3abfdbb
2025-05-26 19:26:10,366 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:10,590 SpawnPoolWorker-39 DEBUG    upload finished in 1.662833s, attributes: file_id=15bef4419202
2025-05-26 19:26:10,590 SpawnPoolWorker-39 DEBUG    upload finished in 1.66337s, attributes: file_id=15bef4419202
upload:  76%|███████▌  | 804/1056 [02:26<00:34,  7.34it/s]2025-05-26 19:26:10,593 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bc6ee0327487.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:11,272 SpawnPoolWorker-41 DEBUG    upload finished in 1.672864s, attributes: file_id=939fefd52114
2025-05-26 19:26:11,275 SpawnPoolWorker-41 DEBUG    upload finished in 1.676374s, attributes: file_id=939fefd52114
upload:  76%|███████▌  | 805/1056 [02:26<01:03,  3.94it/s]2025-05-26 19:26:11,278 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b3837b61cefb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:11,423 SpawnPoolWorker-40 DEBUG    upload finished in 1.578857s, attributes: file_id=ed83f2d51e5d
2025-05-26 19:26:11,423 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:11,822 SpawnPoolWorker-36 DEBUG    upload finished in 1.604144s, attributes: file_id=bcb99853ac0d
2025-05-26 19:26:11,823 SpawnPoolWorker-36 DEBUG    upload finished in 1.605177s, attributes: file_id=bcb99853ac0d
upload:  76%|███████▋  | 807/1056 [02:27<01:07,  3.68it/s]2025-05-26 19:26:11,827 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2b441ab258cb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:11,838 SpawnPoolWorker-38 DEBUG    upload finished in 1.722956s, attributes: file_id=d561509199c7
2025-05-26 19:26:11,838 SpawnPoolWorker-38 DEBUG    upload finished in 1.724159s, attributes: file_id=d561509199c7
2025-05-26 19:26:11,842 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:12,363 SpawnPoolWorker-39 DEBUG    upload finished in 1.771305s, attributes: file_id=bc6ee0327487
2025-05-26 19:26:12,363 SpawnPoolWorker-39 DEBUG    upload finished in 1.772137s, attributes: file_id=bc6ee0327487
upload:  77%|███████▋  | 812/1056 [02:27<00:39,  6.14it/s]2025-05-26 19:26:12,366 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f7716274595e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:12,865 SpawnPoolWorker-40 DEBUG    upload finished in 1.440339s, attributes: file_id=6c787abc4291
2025-05-26 19:26:12,866 SpawnPoolWorker-40 DEBUG    upload finished in 1.441602s, attributes: file_id=6c787abc4291
upload:  77%|███████▋  | 813/1056 [02:28<00:54,  4.44it/s]2025-05-26 19:26:12,869 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5c2b17e060ff.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:13,047 SpawnPoolWorker-41 DEBUG    upload finished in 1.770009s, attributes: file_id=b3837b61cefb
2025-05-26 19:26:13,048 SpawnPoolWorker-41 DEBUG    upload finished in 1.771442s, attributes: file_id=b3837b61cefb
upload:  77%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:13,227 SpawnPoolWorker-36 DEBUG    upload finished in 1.401451s, attributes: file_id=2b441ab258cb
2025-05-26 19:26:13,229 SpawnPoolWorker-36 DEBUG    upload finished in 1.403723s, attributes: file_id=2b441ab258cb
upload:  77%|███████▋  | 815/1056 [02:28<00:50,  4.80it/s]2025-05-26 19:26:13,233 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a8810caeab74.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:13,261 SpawnPoolWorker-38 DEBUG    upload finished in 1.419974s, attributes: file_id=81f5e27cbdb0
2025-05-26 19:26:13,262 SpawnPoolWorker-38 DEBUG    upload finished in 1.420765s, attributes: file_id=81f5e27cbdb0
2025-05-26 19:26:13

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:13,476 SpawnPoolWorker-37 DEBUG    upload finished in 1.463573s, attributes: file_id=9fbe44a56a82
2025-05-26 19:26:13,476 SpawnPoolWorker-37 DEBUG    upload finished in 1.46414s, attributes: file_id=9fbe44a56a82
upload:  77%|███████▋  | 818/1056 [02:28<00:34,  6.92it/s]2025-05-26 19:26:13,478 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/732f4fb1ec47.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:13,518 SpawnPoolWorker-35 DEBUG    upload finished in 1.570292s, attributes: file_id=1f81874fff59
2025-05-26 19:26:13,518 SpawnPoolWorker-35 DEBUG    upload finished in 1.5708s, attributes: file_id=1f81874fff59
2025-05-26 19:26:13,520 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/u

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:13,696 SpawnPoolWorker-39 DEBUG    upload finished in 1.331039s, attributes: file_id=f7716274595e
2025-05-26 19:26:13,697 SpawnPoolWorker-39 DEBUG    upload finished in 1.331829s, attributes: file_id=f7716274595e
upload:  78%|███████▊  | 820/1056 [02:29<00:30,  7.61it/s]2025-05-26 19:26:13,700 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9c273ea8268a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:14,304 SpawnPoolWorker-40 DEBUG    upload finished in 1.435647s, attributes: file_id=5c2b17e060ff
2025-05-26 19:26:14,304 SpawnPoolWorker-40 DEBUG    upload finished in 1.436336s, attributes: file_id=5c2b17e060ff
upload:  78%|███████▊  | 821/1056 [02:29<00:55,  4.27it/s]2025-05-26 19:26:14,306 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8d038b76dd2d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:14,333 SpawnPoolWorker-41 DEBUG    upload finished in 1.279895s, attributes: file_id=442218c526ae
2025-05-26 19:26:14,333 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:14,875 SpawnPoolWorker-36 DEBUG    upload finished in 1.643096s, attributes: file_id=a8810caeab74
2025-05-26 19:26:14,875 SpawnPoolWorker-36 DEBUG    upload finished in 1.64378s, attributes: file_id=a8810caeab74
upload:  78%|███████▊  | 823/1056 [02:30<00:59,  3.94it/s]2025-05-26 19:26:14,877 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9dcd7e2e40a4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:15,002 SpawnPoolWorker-38 DEBUG    upload finished in 1.739298s, attributes: file_id=541de4f8b911
2025-05-26 19:26:15,003 SpawnPoolWorker-38 DEBUG    upload finished in 1.740466s, 

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:15,195 SpawnPoolWorker-39 DEBUG    upload finished in 1.496351s, attributes: file_id=9c273ea8268a
2025-05-26 19:26:15,196 SpawnPoolWorker-39 DEBUG    upload finished in 1.496954s, attributes: file_id=9c273ea8268a
upload:  78%|███████▊  | 826/1056 [02:30<00:40,  5.64it/s]2025-05-26 19:26:15,199 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f8d72333e5ab.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:15,284 SpawnPoolWorker-37 DEBUG    upload finished in 1.806617s, attributes: file_id=732f4fb1ec47
2025-05-26 19:26:15,284 SpawnPoolWorker-37 DEBUG    upload finished in 1.807051s, attributes: file_id=732f4fb1ec47
2025-05-26 19:26:15

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:15,584 SpawnPoolWorker-40 DEBUG    upload finished in 1.278316s, attributes: file_id=8d038b76dd2d
2025-05-26 19:26:15,584 SpawnPoolWorker-40 DEBUG    upload finished in 1.278836s, attributes: file_id=8d038b76dd2d
upload:  79%|███████▊  | 829/1056 [02:31<00:36,  6.22it/s]2025-05-26 19:26:15,586 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8a78498fca58.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:15,935 SpawnPoolWorker-41 DEBUG    upload finished in 1.599922s, attributes: file_id=4c0b74369bd1
2025-05-26 19:26:15,935 SpawnPoolWorker-41 DEBUG    upload finished in 1.600617s, attributes: file_id=4c0b74369bd1
upload:  79%|███████▊  | 830/1056 [02:31<00:45,  4.92it/s]2025-05-26 19:26:15,939 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9ef8039c0e72.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:16,491 SpawnPoolWorker-36 DEBUG    upload finished in 1.614901s, attributes: file_id=9dcd7e2e40a4
2025-05-26 19:26:16,492 SpawnPoolWorker-36 DEBUG    upload finished in 1.61553s, attributes: file_id=9dcd7e2e40a4
upload:  79%|███████▊  | 831/1056 [02:31<01:04,  3.47it/s]2025-05-26 19:26:16,493 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/07d64ee232e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:16,667 SpawnPoolWorker-34 DEBUG    upload finished in 1.621005s, attributes: file_id=d62e7bb843b4
2025-05-26 19:26:16,667 SpawnPoolWorker-34 DEBUG    upload finished in 1.621558s, attributes: file_id=d62e7bb843b4
upload:  79%|███████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:16,781 SpawnPoolWorker-38 DEBUG    upload finished in 1.775182s, attributes: file_id=b779c1d4aa67
2025-05-26 19:26:16,781 SpawnPoolWorker-38 DEBUG    upload finished in 1.775881s, attributes: file_id=b779c1d4aa67
upload:  79%|███████▉  | 833/1056 [02:32<00:49,  4.54it/s]2025-05-26 19:26:16,783 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8045ccf45870.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:16,840 SpawnPoolWorker-37 DEBUG    upload finished in 1.554367s, attributes: file_id=de24a8a5b4c1
2025-05-26 19:26:16,841 SpawnPoolWorker-37 DEBUG    upload finished in 1.555501s, attributes: file_id=de24a8a5b4c1
2025-05-26 19:26:16

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:17,017 SpawnPoolWorker-39 DEBUG    upload finished in 1.818124s, attributes: file_id=f8d72333e5ab
2025-05-26 19:26:17,017 SpawnPoolWorker-39 DEBUG    upload finished in 1.819241s, attributes: file_id=f8d72333e5ab
2025-05-26 19:26:17,020 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/20c60d149b66.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Remov

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:17,440 SpawnPoolWorker-41 DEBUG    upload finished in 1.503097s, attributes: file_id=9ef8039c0e72
2025-05-26 19:26:17,441 SpawnPoolWorker-41 DEBUG    upload finished in 1.503987s, attributes: file_id=9ef8039c0e72
upload:  79%|███████▉  | 838/1056 [02:32<00:35,  6.11it/s]2025-05-26 19:26:17,444 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ff15cc6e9193.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:17,996 SpawnPoolWorker-36 DEBUG    upload finished in 1.502059s, attributes: file_id=07d64ee232e8
2025-05-26 19:26:17,997 SpawnPoolWorker-36 DEBUG    upload finished in 1.504123s, attributes: file_id=07d64ee232e8
upload:  79%|███████▉  | 839/1056 [02:33<00:51,  4.22it/s]2025-05-26 19:26:18,001 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0b0125c395bf.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:18,192 SpawnPoolWorker-37 DEBUG    upload finished in 1.348767s, attributes: file_id=012d89579d81
2025-05-26 19:26:18,194 SpawnPoolWorker-37 DEBUG    upload finished in 1.351394s, attributes: file_id=012d89579d81
upload:  80%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:18,261 SpawnPoolWorker-38 DEBUG    upload finished in 1.479054s, attributes: file_id=8045ccf45870
2025-05-26 19:26:18,262 SpawnPoolWorker-38 DEBUG    upload finished in 1.479506s, attributes: file_id=8045ccf45870
2025-05-26 19:26:18,264 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/99085b75b862.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:18,282 SpawnPoolWorker-40 DEBUG    upload finished in 1.300034s, attributes: file_id=1e5b1d668

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:18,481 SpawnPoolWorker-39 DEBUG    upload finished in 1.461305s, attributes: file_id=20c60d149b66
2025-05-26 19:26:18,481 SpawnPoolWorker-39 DEBUG    upload finished in 1.462065s, attributes: file_id=20c60d149b66
2025-05-26 19:26:18,485 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e3e7210685e7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:18,572 SpawnPoolWorker-35 DEBUG    upload finished in 1.645953s, attributes: file_id=5365aa7b4df8
2025-05-26 19:26:18,572 SpawnPoolWorker-35 DEBUG    upload finished in 1.646633s, attributes: file_id=5365aa7b4df8
upload:  80%|████████  | 845/1056 [02:34<00:28,  7.31it/s]2025-05-26 19:26:18

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:18,981 SpawnPoolWorker-41 DEBUG    upload finished in 1.538008s, attributes: file_id=ff15cc6e9193
2025-05-26 19:26:18,981 SpawnPoolWorker-41 DEBUG    upload finished in 1.538691s, attributes: file_id=ff15cc6e9193
2025-05-26 19:26:18,983 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d85d12b0452b.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:19,485 SpawnPoolWorker-36 DEBUG    upload finished in 1.485557s, attributes: file_id=0b0125c395bf
2025-05-26 19:26:19,486 SpawnPoolWorker-36 DEBUG    upload finished in 1.486164s, attributes: file_id=0b0125c395bf
upload:  80%|████████  | 847/1056 [02:34<00:49,  4.25it/s]2025-05-26 19:26:19,487 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2a3d94e6c469.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:19,766 SpawnPoolWorker-40 DEBUG    upload finished in 1.483192s, attributes: file_id=35bf34380c71
2025-05-26 19:26:19,766 SpawnPoolWorker-40 DEBUG    upload finished in 1.483732s, attributes: file_id=35bf34380c71
upload:  80%|████████  | 848/1056 [02:35<00:50,  4.11it/s]2025-05-26 19:26:19,769 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9ad51f537c01.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:19,915 SpawnPoolWorker-37 DEBUG    upload finished in 1.718255s, attributes: file_id=b0ad568ef9f7
2025-05-26 19:26:19,918 SpawnPoolWorker-37 DEBUG    upload finished in 1.721836s, attributes: file_id=b0ad568ef9f7
upload:  80%|████████  | 849/1056 [02:35<00:46,  4.45it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:20,073 SpawnPoolWorker-34 DEBUG    upload finished in 1.775682s, attributes: file_id=98a9c17583cf
2025-05-26 19:26:20,074 SpawnPoolWorker-34 DEBUG    upload finished in 1.776971s, attributes: file_id=98a9c17583cf
upload:  80%|████████  | 850/1056 [02:35<00:43,  4.78it/s]2025-05-26 19:26:20,078 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/10e70a0da8a4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:20,198 SpawnPoolWorker-35 DEBUG    upload finished in 1.624216s, attributes: file_id=8d3bedb85186
2025-05-26 19:26:20,198 SpawnPoolWorker-35 DEBUG    upload finished in 1.624705s, attributes: file_id=8d3bedb85186
upload:  81%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:20,307 SpawnPoolWorker-39 DEBUG    upload finished in 1.823996s, attributes: file_id=e3e7210685e7
2025-05-26 19:26:20,308 SpawnPoolWorker-39 DEBUG    upload finished in 1.824966s, attributes: file_id=e3e7210685e7
upload:  81%|████████  | 852/1056 [02:35<00:34,  5.95it/s]2025-05-26 19:26:20,311 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ed3aa19003e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:20,500 SpawnPoolWorker-38 DEBUG    upload finished in 2.237539s, attributes: file_id=99085b75b862
2025-05-26 19:26:20,501 SpawnPoolWorker-41 DEBUG    upload finished in 1.518495s, attributes: file_id=d85d12b0452b
2025-05-26 19:26:20

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:21,011 SpawnPoolWorker-36 DEBUG    upload finished in 1.524708s, attributes: file_id=2a3d94e6c469
2025-05-26 19:26:21,012 SpawnPoolWorker-36 DEBUG    upload finished in 1.525295s, attributes: file_id=2a3d94e6c469
upload:  81%|████████  | 855/1056 [02:36<00:42,  4.76it/s]2025-05-26 19:26:21,014 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a2f0fce650e9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:21,256 SpawnPoolWorker-40 DEBUG    upload finished in 1.487766s, attributes: file_id=9ad51f537c01
2025-05-26 19:26:21,257 SpawnPoolWorker-40 DEBUG    upload finished in 1.488535s, attributes: file_id=9ad51f537c01
upload:  81%|████████  | 856/1056 [02:36<00:43,  4.58it/s]2025-05-26 19:26:21,259 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bb8752ad8138.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:21,475 SpawnPoolWorker-37 DEBUG    upload finished in 1.555172s, attributes: file_id=9ff777d6147e
2025-05-26 19:26:21,476 SpawnPoolWorker-37 DEBUG    upload finished in 1.555871s, attributes: file_id=9ff777d6147e
upload:  81%|████████  | 857/1056 [02:36<00:43,  4.58it/s]2025-05-26 19:26:21,477 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/a265db1d1202.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:21,599 SpawnPoolWorker-35 DEBUG    upload finished in 1.399409s, attributes: file_id=700cbef98934
2025-05-26 19:26:21,599 SpawnPoolWorker-35 DEBUG    upload finished in 1.399944s, attributes: file_id=700cbef98934
upload:  81%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:21,805 SpawnPoolWorker-41 DEBUG    upload finished in 1.302085s, attributes: file_id=554e1d7d9220
2025-05-26 19:26:21,806 SpawnPoolWorker-41 DEBUG    upload finished in 1.302782s, attributes: file_id=554e1d7d9220
upload:  81%|████████▏ | 860/1056 [02:37<00:30,  6.48it/s]2025-05-26 19:26:21,808 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3e3caf8bc4f3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:21,894 SpawnPoolWorker-39 DEBUG    upload finished in 1.583917s, attributes: file_id=ed3aa19003e8
2025-05-26 19:26:21,894 SpawnPoolWorker-39 DEBUG    upload finished in 1.584629s, attributes: file_id=ed3aa19003e8
2025-05-26 19:26:21

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:22,326 SpawnPoolWorker-38 DEBUG    upload finished in 1.822674s, attributes: file_id=9c3abec38969
2025-05-26 19:26:22,326 SpawnPoolWorker-38 DEBUG    upload finished in 1.823677s, attributes: file_id=9c3abec38969
upload:  82%|████████▏ | 862/1056 [02:37<00:37,  5.13it/s]2025-05-26 19:26:22,329 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/15ff90b50959.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:22,575 SpawnPoolWorker-36 DEBUG    upload finished in 1.561848s, attributes: file_id=a2f0fce650e9
2025-05-26 19:26:22,576 SpawnPoolWorker-36 DEBUG    upload finished in 1.562503s, attributes: file_id=a2f0fce650e9
upload:  82%|████████▏ | 863/1056 [02:38<00:39,  4.84it/s]2025-05-26 19:26:22,578 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1c3ba7835fcc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:22,982 SpawnPoolWorker-40 DEBUG    upload finished in 1.723931s, attributes: file_id=bb8752ad8138
2025-05-26 19:26:22,983 SpawnPoolWorker-40 DEBUG    upload finished in 1.724799s, attributes: file_id=bb8752ad8138
upload:  82%|████████▏ | 864/1056 [02:38<00:48,  3.94it/s]2025-05-26 19:26:22,988 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b492b0faf789.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:23,075 SpawnPoolWorker-37 DEBUG    upload finished in 1.598548s, attributes: file_id=a265db1d1202
2025-05-26 19:26:23,076 SpawnPoolWorker-37 DEBUG    upload finished in 1.599067s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:23,194 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e355cdf06261.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:23,296 SpawnPoolWorker-34 DEBUG    upload finished in 1.632144s, attributes: file_id=c032697c40c1
2025-05-26 19:26:23,296 SpawnPoolWorker-34 DEBUG    upload finished in 1.6326s, attributes: file_id=c032697c40c1
upload:  82%|████████▏ | 867/1056 [02:38<00:32,  5.75it/s]2025-05-26 19:26:23,299 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7ef8c3828c18.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:23,580 SpawnPoolWorker-39 DEBUG    upload finished in 1.684031s, attributes: file_id=96bafd594da0
2025-05-26 19:26:23,580 SpawnPoolWorker-39 DEBUG    upload finished in 1.684596s, attributes: file_id=96bafd594da0
upload:  82%|████████▏ | 869/1056 [02:39<00:30,  6.19it/s]2025-05-26 19:26:23,583 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/810dc473a59c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:24,170 SpawnPoolWorker-38 DEBUG    upload finished in 1.842066s, attributes: file_id=15ff90b50959
2025-05-26 19:26:24,171 SpawnPoolWorker-38 DEBUG    upload finished in 1.842929s, attributes: file_id=15ff90b50959
upload:  82%|████████▏ | 870/1056 [02:39<00:47,  3.92it/s]2025-05-26 19:26:24,174 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/21c478a4df0d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:24,237 SpawnPoolWorker-36 DEBUG    upload finished in 1.65907s, attributes: file_id=1c3ba7835fcc
2025-05-26 19:26:24,237 SpawnPoolWorker-36 DEBUG    upload finished in 1.659879s, attributes: file_id=1c3ba7835fcc
2025-05-26 19:26:24,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:24,682 SpawnPoolWorker-40 DEBUG    upload finished in 1.695941s, attributes: file_id=b492b0faf789
2025-05-26 19:26:24,682 SpawnPoolWorker-40 DEBUG    upload finished in 1.696917s, attributes: file_id=b492b0faf789
upload:  83%|████████▎ | 872/1056 [02:40<00:46,  3.92it/s]2025-05-26 19:26:24,686 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c94b66f933e2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:24,719 SpawnPoolWorker-41 DEBUG    upload finished in 1.395279s, attributes: file_id=b6e79680c9f0
2025-05-26 19:26:24,719 SpawnPoolWorker-41 DEBUG    upload finished in 1.395802s, attributes: file_id=b6e79680c9f0
2025-05-26 19:26:24

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:25,164 SpawnPoolWorker-34 DEBUG    upload finished in 1.866204s, attributes: file_id=7ef8c3828c18
2025-05-26 19:26:25,164 SpawnPoolWorker-34 DEBUG    upload finished in 1.866926s, attributes: file_id=7ef8c3828c18
upload:  83%|████████▎ | 876/1056 [02:40<00:33,  5.43it/s]2025-05-26 19:26:25,168 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/5eddf26368ed.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:25,267 SpawnPoolWorker-39 DEBUG    upload finished in 1.685013s, attribut

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:25,722 SpawnPoolWorker-38 DEBUG    upload finished in 1.549384s, attributes: file_id=21c478a4df0d
2025-05-26 19:26:25,723 SpawnPoolWorker-38 DEBUG    upload finished in 1.550013s, attributes: file_id=21c478a4df0d
upload:  83%|████████▎ | 878/1056 [02:41<00:41,  4.34it/s]2025-05-26 19:26:25,724 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7093271b4c77.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:26,114 SpawnPoolWorker-36 DEBUG    upload finished in 1.875332s, attributes: file_id=1ffaa33353d9
2025-05-26 19:26:26,115 SpawnPoolWorker-36 DEBUG    upload finished in 1.875904s, attributes: file_id=1ffaa33353d9
upload:  83%|████████▎ | 879/1056 [02:41<00:47,  3.72it/s]2025-05-26 19:26:26,116 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/67e401a633ad.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:26,179 SpawnPoolWorker-40 DEBUG    upload finished in 1.493496s, attributes: file_id=c94b66f933e2
2025-05-26 19:26:26,179 SpawnPoolWorker-40 DEBUG    upload finished in 1.494376s, attributes: file_id=c94b66f933e2
2025-05-26 19:26:26

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:26,441 SpawnPoolWorker-35 DEBUG    upload finished in 1.571976s, attributes: file_id=31ed8eb15b0d
2025-05-26 19:26:26,442 SpawnPoolWorker-35 DEBUG    upload finished in 1.572859s, attributes: file_id=31ed8eb15b0d
upload:  84%|████████▎ | 883/1056 [02:41<00:26,  6.53it/s]2025-05-26 19:26:26,446 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/015fe66c660b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:26,732 SpawnPoolWorker-34 DEBUG    upload finished in 1.565636s, attributes: file_id=5eddf26368ed
2025-05-26 19:26:26,733 SpawnPoolWorker-34 DEBUG    upload finished in 1.566251s, attributes: file_id=5eddf26368ed
upload:  84%|████████▎ | 884/1056 [02:42<00:31,  5.51it/s]2025-05-26 19:26:26,734 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/12c2233a0d93.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:27,234 SpawnPoolWorker-39 DEBUG    upload finished in 1.958043s, attributes: file_id=5e5d26cad0f3
2025-05-26 19:26:27,234 SpawnPoolWorker-39 DEBUG    upload finished in 1.959294s, attributes: file_id=5e5d26cad0f3
upload:  84%|████████▍ | 885/1056 [02:42<00:43,  3.93it/s]2025-05-26 19:26:27,237 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/58066225cd39.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:27,374 SpawnPoolWorker-38 DEBUG    upload finished in 1.649574s, attributes: file_id=7093271b4c77
2025-05-26 19:26:27,374 SpawnPoolWorker-38 DEBUG    upload finished in 1.650126s, attributes: file_id=7093271b4c77
upload:  84%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:27,706 SpawnPoolWorker-37 DEBUG    upload finished in 1.423335s, attributes: file_id=10217514a585
2025-05-26 19:26:27,707 SpawnPoolWorker-37 DEBUG    upload finished in 1.423925s, attributes: file_id=10217514a585
upload:  84%|████████▍ | 887/1056 [02:43<00:42,  3.94it/s]2025-05-26 19:26:27,709 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/53e5b114eb7a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:27,725 SpawnPoolWorker-36 DEBUG    upload finished in 1.608942s, attributes: file_id=67e401a633ad
2025-05-26 19:26:27,725 SpawnPoolWorker-36 DEBUG    upload finished in 1.609428s, attributes: file_id=67e401a633ad
2025-05-26 19:26:27,726 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:28,052 SpawnPoolWorker-40 DEBUG    upload finished in 1.871773s, attributes: file_id=4c7f03d092d4
2025-05-26 19:26:28,053 SpawnPoolWorker-40 DEBUG    upload finished in 1.872425s, attributes: file_id=4c7f03d092d4
upload:  84%|████████▍ | 891/1056 [02:43<00:24,  6.70it/s]2025-05-26 19:26:28,056 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/802d73cbaca8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:28,275 SpawnPoolWorker-34 DEBUG    upload finished in 1.541347s, attributes: file_id=12c2233a0d93
2025-05-26 19:26:28,275 SpawnPoolWorker-34 DEBUG    upload finished in 1.541851s, attributes: file_id=12c2233a0d93
upload:  84%|████████▍ | 892/1056 [02:43<00:27,  6.07it/s]2025-05-26 19:26:28,277 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1f44eee66104.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:28,797 SpawnPoolWorker-39 DEBUG    upload finished in 1.560565s, attributes: file_id=58066225cd39
2025-05-26 19:26:28,798 SpawnPoolWorker-39 DEBUG    upload finished in 1.561334s, attributes: file_id=58066225cd39
upload:  85%|████████▍ | 893/1056 [02:44<00:40,  4.03it/s]2025-05-26 19:26:28,801 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1386a83f83f1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:29,001 SpawnPoolWorker-38 DEBUG    upload finished in 1.626358s, attributes: file_id=72b1bac2b13b
upload:  85%|████████▍ | 894/1056 [02:44<00:38,  4.21it/s]2025-05-26 19:26:29,002

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:29,007 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e033dab7ebf3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:29,340 SpawnPoolWorker-35 DEBUG    upload finished in 1.477189s, attributes: file_id=3dce009a5766
2025-05-26 19:26:29,341 SpawnPoolWorker-35 DEBUG    upload finished in 1.478054s, attributes: file_id=3dce009a5766
upload:  85%|████████▍ | 895/1056 [02:44<00:42,  3.79it/s]2025-05-26 19:26:29,345 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0ae5202f221f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:29,521 SpawnPoolWorker-37 DEBUG    upload finished in 1.812797s, attributes: file_id=53e5b114eb7a
2025-05-26 19:26:29,521 SpawnPoolWorker-37 DEBUG    upload finished in 1.813491s,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:29,675 SpawnPoolWorker-41 DEBUG    upload finished in 1.844543s, attributes: file_id=1f1f8bc1ff62
2025-05-26 19:26:29,675 SpawnPoolWorker-41 DEBUG    upload finished in 1.845178s, attributes: file_id=1f1f8bc1ff62
upload:  85%|████████▌ | 898/1056 [02:45<00:26,  5.91it/s]2025-05-26 19:26:29,678 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9a547fbc2160.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:30,068 SpawnPoolWorker-34 DEBUG    upload finished in 1.790913s, attributes: file_id=1f44eee66104
2025-05-26 19:26:30,068 SpawnPoolWorker-34 DEBUG    upload finished in 1.791451s, attributes: file_id=1f44eee66104
upload:  85%|████████▌ | 900/1056 [02:45<00:29,  5.34it/s]2025-05-26 19:26:30,070 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/530e84d15e4a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:30,392 SpawnPoolWorker-39 DEBUG    upload finished in 1.592877s, attributes: file_id=1386a83f83f1
2025-05-26 19:26:30,393 SpawnPoolWorker-39 DEBUG    upload finished in 1.593436s, attributes: file_id=1386a83f83f1
upload:  85%|████████▌ | 901/1056 [02:45<00:34,  4.47it/s]2025-05-26 19:26:30,395 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/403eeb228e8b.json not detected as batch file data
Removed trailing semicolon and whitespace from query
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:30,641 SpawnPoolWorker-38 DEBUG    upload finished in 1.635733s, attributes: file_id=e033dab7ebf3
2025-05-26 19:26:30,641 SpawnPoolWorker-38 DEBUG    upload finished in 1.636452s, attributes: file_id=e033dab7ebf3
upload:  85%|████████▌ | 902/1056 [02:46<00:35,  4.34it/s]2025-05-26 19:26:30,643 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/49daa4ccb643.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:30,787 SpawnPoolWorker-35 DEBUG    upload finished in 1.443677s, attributes: file_id=0ae5202f221f
2025-05-26 19:26:30,788 SpawnPoolWorker-35 DEBUG    upload finished in 1.44459s, attributes: file_id=0ae5202f221f
upload:  86%|████████▌ | 903/1056 [02:46<00:31,  4.83it/s]2025-05-26 19:2

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:30,954 SpawnPoolWorker-41 DEBUG    upload finished in 1.276782s, attributes: file_id=9a547fbc2160
2025-05-26 19:26:30,954 SpawnPoolWorker-41 DEBUG    upload finished in 1.277392s, attributes: file_id=9a547fbc2160
upload:  86%|████████▌ | 904/1056 [02:46<00:29,  5.12it/s]2025-05-26 19:26:30,957 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c1bfe9457276.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:31,110 SpawnPoolWorker-36 DEBUG    upload finished in 1.565386s, attributes: file_id=7dcc796fee8e
2025-05-26 19:26:31,111 SpawnPoolWorker-36 DEBUG    upload finished in 1.566098s, attributes: file_id=7dcc796fee8e
upload:  86%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:31,201 SpawnPoolWorker-37 DEBUG    upload finished in 1.67785s, attributes: file_id=41514ae129ae
2025-05-26 19:26:31,201 SpawnPoolWorker-37 DEBUG    upload finished in 1.678461s, attributes: file_id=41514ae129ae
2025-05-26 19:26:31,204 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1dfc4744b11f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:31,748 SpawnPoolWorker-34 DEBUG    upload finished in 1.678192s, attributes: file_id=530e84d15e4a
2025-05-26 19:26:31,748 SpawnPoolWorker-34 DEBUG    upload finished in 1.679224s, attributes: file_id=530e84d15e4a
upload:  86%|████████▌ | 908/1056 [02:47<00:29,  5.00it/s]2025-05-26 19:26:31,753 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/323976cd72fb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:32,007 SpawnPoolWorker-39 DEBUG    upload finished in 1.613049s, attributes: file_id=403eeb228e8b
2025-05-26 19:26:32,007 SpawnPoolWorker-39 DEBUG    upload finished in 1.613573s, attributes: file_id=403eeb228e8b
upload:  86%|████████▌ | 909/1056 [02:47<00:31,  4.71it/s]2025-05-26 19:26:32,010 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b17b1e9ca6fe.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:32,346 SpawnPoolWorker-38 DEBUG    upload finished in 1.703641s, attributes: file_id=49daa4ccb643
2025-05-26 19:26:32,346 SpawnPoolWorker-38 DEBUG    upload finished in 1.704186s, attributes: file_id=49daa4ccb643
upload:  86%|████████▌ | 910/1056 [02:47<00:35,  4.14it/s]2025-05-26 19:26:32,349 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c9d9fcf05b72.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:32,372 SpawnPoolWorker-40 DEBUG    upload finished in 1.232693s, attributes: file_id=c354fce2fc58
2025-05-26 19:26:32,372 SpawnPoolWorker-40 DEBUG    upload finished in 1.233232s, attributes: file_id=c354fce2fc58
2025-05-26 19:26:32,374 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:33,211 SpawnPoolWorker-34 DEBUG    upload finished in 1.459887s, attributes: file_id=323976cd72fb
2025-05-26 19:26:33,211 SpawnPoolWorker-34 DEBUG    upload finished in 1.460691s, attributes: file_id=323976cd72fb
upload:  87%|████████▋ | 916/1056 [02:48<00:25,  5.58it/s]2025-05-26 19:26:33,215 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3aa8e56b6c5a.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:33,597 SpawnPoolWorker-39 DEBUG    upload finished in 1.588603s, attributes: file_id=b17b1e9ca6fe
2025-05-26 19:26:33,598 SpawnPoolWorker-39 DEBUG    upload finished in 1.589172s, attributes: file_id=b17b1e9ca6fe
upload:  87%|████████▋ | 917/1056 [02:49<00:29,  4.72it/s]2025-05-26 19:26:33,601 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d51372b81f93.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:33,657 SpawnPoolWorker-40 DEBUG    upload finished in 1.282924s, attributes: file_id=ae9fb7b7832d
2025-05-26 19:26:33,657 SpawnPoolWorker-40 DEBUG    upload finished in 1.283555s, attributes: file_id=ae9fb7b7832d
2025-05-26 19:26:33

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:33,926 SpawnPoolWorker-37 DEBUG    upload finished in 1.454065s, attributes: file_id=d7cb146de1df
2025-05-26 19:26:33,927 SpawnPoolWorker-37 DEBUG    upload finished in 1.455655s, attributes: file_id=d7cb146de1df
upload:  87%|████████▋ | 921/1056 [02:49<00:19,  6.84it/s]2025-05-26 19:26:33,930 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c4f3c133218d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:33,937 SpawnPoolWorker-41 DEBUG    upload finished in 1.485178s, attributes: file_id=0e97a376a3a1
2025-05-26 19:26:33,937 SpawnPoolWorker-41 DEBUG    upload finished in 1.485818s, attributes: file_id=0e97a376a3a1
2025-05-26 19:26:33,939 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:34,596 SpawnPoolWorker-34 DEBUG    upload finished in 1.38218s, attributes: file_id=3aa8e56b6c5a
2025-05-26 19:26:34,597 SpawnPoolWorker-34 DEBUG    upload finished in 1.383612s, attributes: file_id=3aa8e56b6c5a
upload:  88%|████████▊ | 924/1056 [02:50<00:23,  5.66it/s]2025-05-26 19:26:34,599 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/76e5232738e3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:35,282 SpawnPoolWorker-40 DEBUG    upload finished in 1.623129s, attributes: file_id=66133cffe6b9
2025-05-26 19:26:35,284 SpawnPoolWorker-40 DEBUG    upload finished in 1.625357s, attributes: file_id=66133cffe6b9
upload:  88%|████████▊ | 925/1056 [02:50<00:33,  3.87it/s]2025-05-26 19:26:35,287 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7bf1c8473298.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:35,323 SpawnPoolWorker-38 DEBUG    upload finished in 1.605801s, attributes: file_id=815c7025997d
2025-05-26 19:26:35,323 SpawnPoolWorker-38 DEBUG    upload finished in 1.606421s, attributes: file_id=815c7025997d
2025-05-26 19:26:35

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:35,538 SpawnPoolWorker-41 DEBUG    upload finished in 1.599597s, attributes: file_id=d67ea5b60746
2025-05-26 19:26:35,538 SpawnPoolWorker-41 DEBUG    upload finished in 1.600388s, attributes: file_id=d67ea5b60746
upload:  88%|████████▊ | 928/1056 [02:51<00:23,  5.40it/s]2025-05-26 19:26:35,540 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/848da5d5478f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:35,638 SpawnPoolWorker-36 DEBUG    upload finished in 1.668171s, attributes: file_id=da5af3ec5a48
2025-05-26 19:26:35,638 SpawnPoolWorker-36 DEBUG    upload finished in 1.669156s, attributes: file_id=da5af3ec5a48
upload:  88%|████████▊ | 929/1056 [02:51<00:21,  5.97it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:35,747 SpawnPoolWorker-39 DEBUG    upload finished in 2.147258s, attributes: file_id=d51372b81f93
2025-05-26 19:26:35,748 SpawnPoolWorker-39 DEBUG    upload finished in 2.148123s, attributes: file_id=d51372b81f93
upload:  88%|████████▊ | 931/1056 [02:51<00:15,  7.96it/s]2025-05-26 19:26:35,750 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f46cf4c668cc.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:36,085 SpawnPoolWorker-34 DEBUG    upload finished in 1.486766s, attributes: file_id=76e5232738e3
2025-05-26 19:26:36,087 SpawnPoolWorker-34 DEBUG    upload finished in 1.488963s, attributes: file_id=76e5232738e3
2025-05-26 19:26:36,102 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3d125e9c43fd.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:36,713 SpawnPoolWorker-35 DEBUG    upload finished in 1.31346s, attributes: file_id=aea53f425a64
2025-05-26 19:26:36,714 SpawnPoolWorker-35 DEBUG    upload finished in 1.314637s, attributes: file_id=aea53f425a64
upload:  88%|████████▊ | 933/1056 [02:52<00:30,  4.00it/s]2025-05-26 19:26:36,724 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e7c14de8e5f5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:36,769 SpawnPoolWorker-40 DEBUG    upload finished in 1.482474s, attributes: file_id=7bf1c8473298
2025-05-26 19:26:36,775 SpawnPoolWorker-40 DEBUG    upload finished in 1.488809s, attributes: file_id=7bf1c8473298
2025-05-26 19:26:36,

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:37,025 SpawnPoolWorker-37 DEBUG    upload finished in 1.351469s, attributes: file_id=9ffe4f533279
2025-05-26 19:26:37,028 SpawnPoolWorker-37 DEBUG    upload finished in 1.354275s, attributes: file_id=9ffe4f533279
2025-05-26 19:26:37,032 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/02ae46f34c5b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:37,144 SpawnPoolWorker-41 DEBUG    upload finished in 1.604166s, attributes: file_id=848da5d5478f
2025-05-26 19:26:37,146 SpawnPoolWorker-41 DEBUG    upload finished in 1.605832s, attributes: file_id=848da5d5478f
upload:  89%|████████▊ | 937/1056 [02:52<00:21,  5.67it/s]2025-05-26 19:26:37,155 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:37,259 SpawnPoolWorker-36 DEBUG    upload finished in 1.618982s, attributes: file_id=8c06295f836c
2025-05-26 19:26:37,260 SpawnPoolWorker-36 DEBUG    upload finished in 1.619954s, attributes: file_id=8c06295f836c
upload:  89%|████████▉ | 939/1056 [02:52<00:16,  7.20it/s]2025-05-26 19:26:37,263 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bfa980dba4b2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:37,625 SpawnPoolWorker-34 DEBUG    upload finished in 1.525802s, attributes: file_id=3d125e9c43fd
2025-05-26 19:26:37,626 SpawnPoolWorker-34 DEBUG    upload finished in 1.527868s, attributes: file_id=3d125e9c43fd
2025-05-26 19:26:37,628 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f1800f177ad8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:38,325 SpawnPoolWorker-38 DEBUG    upload finished in 1.474979s, attributes: file_id=dba34574fe25
2025-05-26 19:26:38,325 SpawnPoolWorker-38 DEBUG    upload finished in 1.476058s, attributes: file_id=dba34574fe25
upload:  89%|████████▉ | 941/1056 [02:53<00:30,  3.83it/s]2025-05-26 19:26:38,328 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/87340de2ad34.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:38,405 SpawnPoolWorker-35 DEBUG    upload finished in 1.682493s, attributes: file_id=e7c14de8e5f5
2025-05-26 19:26:38,406 SpawnPoolWorker-35 DEBUG    upload finished in 1.683875s, attributes: file_id=e7c14de8e5f5
2025-05-26 19:26:38

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:38,693 SpawnPoolWorker-37 DEBUG    upload finished in 1.661662s, attributes: file_id=02ae46f34c5b
2025-05-26 19:26:38,695 SpawnPoolWorker-37 DEBUG    upload finished in 1.664779s, attributes: file_id=02ae46f34c5b
upload:  89%|████████▉ | 944/1056 [02:54<00:22,  4.89it/s]2025-05-26 19:26:38,698 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c94d661d5115.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:38,919 SpawnPoolWorker-41 DEBUG    upload finished in 1.765363s, attributes: file_id=1f6f9db87828
2025-05-26 19:26:38,919 SpawnPoolWorker-41 DEBUG    upload finished in 1.766208s, attributes: file_id=1f6f9db87828
upload:  89%|████████▉ | 945/1056 [02:54<00:23,  4.80it/s]2025-05-26 19:26:38,922 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac6dcb38a7cb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:39,039 SpawnPoolWorker-36 DEBUG    upload finished in 1.776916s, attributes: file_id=bfa980dba4b2
2025-05-26 19:26:39,039 SpawnPoolWorker-36 DEBUG    upload finished in 1.777508s, attributes: file_id=bfa980dba4b2
upload:  90%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:39,122 SpawnPoolWorker-39 DEBUG    upload finished in 1.91716s, attributes: file_id=cb9c6f375afa
2025-05-26 19:26:39,124 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/cf15afbd0508.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:39,188 SpawnPoolWorker-34 DEBUG    upload finished in 1.560539s, attributes: file_id=f1800f177ad8
2025-05-26 19:26:39,189 SpawnPoolWorker-34 DEBUG    upload finished in 1.561077s, attributes: file_id=f1800f177ad8
upload:  90%|████████▉ | 948/1056 [02:54<00:15,  6.95it/s]2025-05-26 19:26:39,190 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/d7e4a51e9555.json not detected as batch file data
A value is trying to be set

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:40,016 SpawnPoolWorker-38 DEBUG    upload finished in 1.68875s, attributes: file_id=87340de2ad34
2025-05-26 19:26:40,016 SpawnPoolWorker-38 DEBUG    upload finished in 1.68954s, attributes: file_id=87340de2ad34
upload:  90%|████████▉ | 949/1056 [02:55<00:31,  3.41it/s]2025-05-26 19:26:40,019 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/82e2d5777ae0.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:40,117 SpawnPoolWorker-35 DEBUG    upload finished in 1.709038s, attributes: file_id=de8af734009e
2025-05-26 19:26:40,118 SpawnPoolWorker-35 DEBUG    upload finished in 1.710165s, a

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:40,276 SpawnPoolWorker-37 DEBUG    upload finished in 1.578561s, attributes: file_id=c94d661d5115
2025-05-26 19:26:40,276 SpawnPoolWorker-37 DEBUG    upload finished in 1.579159s, attributes: file_id=c94d661d5115
upload:  90%|█████████ | 952/1056 [02:55<00:18,  5.57it/s]2025-05-26 19:26:40,278 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/48c0b3e2ce20.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:40,339 SpawnPoolWorker-41 DEBUG    upload finished in 1.418529s, attributes: file_id=ac6dcb38a7cb
2025-05-26 19:26:40,340 SpawnPoolWorker-41 DEBUG    upload finished in 1.41923s, attributes: file_id=ac6dcb38a7cb
2025-05-26 19:26:40,341 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:40,523 SpawnPoolWorker-36 DEBUG    upload finished in 1.482614s, attributes: file_id=a9257a8cab40
2025-05-26 19:26:40,524 SpawnPoolWorker-36 DEBUG    upload finished in 1.483459s, attributes: file_id=a9257a8cab40
upload:  90%|█████████ | 954/1056 [02:56<00:16,  6.28it/s]2025-05-26 19:26:40,527 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b5f2de71a33d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:40,771 SpawnPoolWorker-34 DEBUG    upload finished in 1.580615s, attributes: file_id=d7e4a51e9555
2025-05-26 19:26:40,771 SpawnPoolWorker-34 DEBUG    upload finished in 1.581089s, attributes: file_id=d7e4a51e9555
upload:  90%|█████████ | 955/1056 [02:56<00:17,  5.64it/s]2025-05-26 19:26:40,772 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2383d4af9a74.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:40,961 SpawnPoolWorker-39 DEBUG    upload finished in 1.837785s, attributes: file_id=cf15afbd0508
2025-05-26 19:26:40,962 SpawnPoolWorker-39 DEBUG    upload finished in 1.838718s, attributes: file_id=cf15afbd0508
upload:  91%|█████████ | 956/1056 [02:56<00:18,  5.54it/s]2025-05-26 19:

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:41,687 SpawnPoolWorker-38 DEBUG    upload finished in 1.668973s, attributes: file_id=82e2d5777ae0
2025-05-26 19:26:41,688 SpawnPoolWorker-38 DEBUG    upload finished in 1.669712s, attributes: file_id=82e2d5777ae0
upload:  91%|█████████ | 957/1056 [02:57<00:31,  3.18it/s]2025-05-26 19:26:41,692 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/03971e548d56.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:41,841 SpawnPoolWorker-41 DEBUG    upload finished in 1.500177s, attributes: file_id=a5abf0bc0aad
2025-05-26 19:26:41,842 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:41,923 SpawnPoolWorker-40 DEBUG    upload finished in 1.74611s, attributes: file_id=3bb2acdd241f
2025-05-26 19:26:41,923 SpawnPoolWorker-35 DEBUG    upload finished in 1.802553s, attributes: file_id=16cf1f72cb35
2025-05-26 19:26:41,924 SpawnPoolWorker-40 DEBUG    upload finished in 1.746939s, attributes: file_id=3bb2acdd241f
2025-05-26 19:26:41,924 SpawnPoolWorker-35 DEBUG    upload finished in 1.803411s, attributes: file_id=16cf1f72cb35
2025-05-26 19:26:41,927 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/dc7f0ce45582.json not detected as batch file data
2025-05-26 19:26:41,927 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0e4662eebb9f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace(

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:42,190 SpawnPoolWorker-36 DEBUG    upload finished in 1.663924s, attributes: file_id=b5f2de71a33d
2025-05-26 19:26:42,191 SpawnPoolWorker-36 DEBUG    upload finished in 1.664481s, attributes: file_id=b5f2de71a33d
upload:  91%|█████████ | 962/1056 [02:57<00:15,  6.19it/s]2025-05-26 19:26:42,193 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2d30ea8f0676.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:42,263 SpawnPoolWorker-34 DEBUG    upload finished in 1.491481s, attributes: file_id=2383d4af9a74
2025-05-26 19:26:42,264 SpawnPoolWorker-34 DEBUG    upload finished in 1.492119s, attributes: file_id=2383d4af9a74
2025-05-26 19:26:42

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:43,006 SpawnPoolWorker-38 DEBUG    upload finished in 1.316196s, attributes: file_id=03971e548d56
2025-05-26 19:26:43,008 SpawnPoolWorker-38 DEBUG    upload finished in 1.318564s, attributes: file_id=03971e548d56
2025-05-26 19:26:43,011 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/0efafe00f4fb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:43,126 SpawnPoolWorker-41 DEBUG    upload finished in 1.281958s, attributes: file_id=4532d2fa38f1
2025-05-26 19:26:43,127 SpawnPoolWorker-41 DEBUG    upload finished in 1.282853s, attr

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:43,210 SpawnPoolWorker-40 DEBUG    upload finished in 1.28388s, attributes: file_id=0e4662eebb9f
2025-05-26 19:26:43,214 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ea6e1b037937.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:43,226 SpawnPoolWorker-37 DEBUG    upload finished in 1.188748s, attributes: file_id=fd226dd04ca7
2025-05-26 19:26:43,227 SpawnPoolWorker-37 DEBUG    upload finished in 1.189495s, attributes: file_id=fd226dd04ca7
2025-05-26 19:26:43,229 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/818b1e43e105.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:43,502 SpawnPoolWorker-39 DEBUG    upload finished in 1.201663s, attributes: file_id=2576f0ca614c
2025-05-26 19:26:43,502 SpawnPoolWorker-39 DEBUG    upload finished in 1.202191s, attributes: file_id=2576f0ca614c
2025-05-26 19:26:43,504 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8a83f44c1b01.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:43,653 SpawnPoolWorker-34 DEBUG    upload finished in 1.387976s, attributes: file_id=a3823fa5e977
2025-05-26 19:26:43,654 SpawnPoolWorker-34 DEBUG    upload finished in 1.388604s, attributes: file_id=a3823fa5e977
upload:  92%|█████████▏| 971/1056 [02:59<00:13,  6.17it/s]2025-05-26 19:26:43,657 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cach

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:43,741 SpawnPoolWorker-36 DEBUG    upload finished in 1.549502s, attributes: file_id=2d30ea8f0676
2025-05-26 19:26:43,742 SpawnPoolWorker-36 DEBUG    upload finished in 1.550098s, attributes: file_id=2d30ea8f0676
2025-05-26 19:26:43,744 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9968b9eef265.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:44,421 SpawnPoolWorker-38 DEBUG    upload finished in 1.410149s, attributes: file_id=0efafe00f4fb
2025-05-26 19:26:44,421 SpawnPoolWorker-38 DEBUG    upload finished in 1.41118s, attributes: file_id=0efafe00f4fb
upload:  92%|█████████▏| 973/1056 [02:59<00:18,  4.41it/s]2025-05-26 19:26:44,426 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/b46884a15095.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:44,591 SpawnPoolWorker-41 DEBUG    upload finished in 1.462194s, attributes: file_id=cf86e6719f28
2025-05-26 19:26:44,591 Spawn

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:44,778 SpawnPoolWorker-37 DEBUG    upload finished in 1.549294s, attributes: file_id=818b1e43e105
2025-05-26 19:26:44,778 SpawnPoolWorker-40 DEBUG    upload finished in 1.565198s, attributes: file_id=ea6e1b037937
2025-05-26 19:26:44,778 SpawnPoolWorker-37 DEBUG    upload finished in 1.550305s, attributes: file_id=818b1e43e105
2025-05-26 19:26:44,778 SpawnPoolWorker-40 DEBUG    upload finished in 1.566343s, attributes: file_id=ea6e1b037937
upload:  92%|█████████▏| 975/1056 [03:00<00:17,  4.74it/s]2025-05-26 19:26:44,782 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/782de5f7d99c.json not detected as batch file data
2025-05-26 19:26:44,782 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/790d529bf601.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:45,117 SpawnPoolWorker-34 DEBUG    upload finished in 1.461332s, attributes: file_id=d6787d83aa43
2025-05-26 19:26:45,117 SpawnPoolWorker-34 DEBUG    upload finished in 1.461948s, attributes: file_id=d6787d83aa43
upload:  93%|█████████▎| 978/1056 [03:00<00:12,  6.02it/s]2025-05-26 19:26:45,120 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c7c14fd0a569.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:45,248 SpawnPoolWorker-36 DEBUG    upload finished in 1.504845s, attributes: file_id=9968b9eef265
2025-05-26 19:26:45,248 SpawnPoolWorker-36 DEBUG    upload finished in 1.505345s, attributes: file_id=9968b9eef265
upload:  93%|██████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:45,337 SpawnPoolWorker-39 DEBUG    upload finished in 1.834377s, attributes: file_id=8a83f44c1b01
2025-05-26 19:26:45,338 SpawnPoolWorker-39 DEBUG    upload finished in 1.834883s, attributes: file_id=8a83f44c1b01
2025-05-26 19:26:45,340 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7943c3b3063d.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:46,083 SpawnPoolWorker-38 DEBUG    upload finished in 1.658569s, attributes: file_id=b46884a15095
2025-05-26 19:26:46,083 SpawnPoolWorker-38 DEBUG    upload finished in 1.659735s, attributes: file_id=b46884a15095
upload:  93%|█████████▎| 981/1056 [03:01<00:18,  4.05it/s]2025-05-26 19:26:46,088 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c38c295848f5.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:46,336 SpawnPoolWorker-37 DEBUG    upload finished in 1.55427s, attributes: file_id=782de5f7d99c
2025-05-26 19:26:46,336 SpawnPoolWorker-37 DEBUG    upload finished in 1.554973s, attributes: file_id=782de5f7d99c
upload:  93%|█████████▎| 982/1056 [03:01<00:18,  4.03it/s]2025-05-26 19:26:46,339 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bd9abb0e8a19.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:46,403 SpawnPoolWorker-35 DEBUG    upload finished in 1.611551s, attributes: file_id=41a80c9a1208
2025-05-26 19:26:46,403 SpawnPoolWorker-35 DEBUG    upload finished in 1.612138s, attributes: file_id=41a80c9a1208
2025-05-26 19:26:46,405 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:46,543 SpawnPoolWorker-41 DEBUG    upload finished in 1.94983s, attributes: file_id=115f987144b2
2025-05-26 19:26:46,544 SpawnPoolWorker-41 DEBUG    upload finished in 1.950777s, attributes: file_id=115f987144b2
upload:  93%|█████████▎| 984/1056 [03:02<00:14,  5.09it/s]2025-05-26 19:26:46,548 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/140f77dbac3f.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:46,622 SpawnPoolWorker-34 DEBUG    upload finished in 1.502843s, attributes: file_id=c7c14fd0a569
2025-05-26 19:26:46,622 SpawnPoolWorker-34 DEBUG    upload finished in 1.503377s, attributes: file_id=c7c14fd0a569
2025-05-26 19:26:46,624 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:46,834 SpawnPoolWorker-40 DEBUG    upload finished in 2.052236s, attributes: file_id=790d529bf601
2025-05-26 19:26:46,834 SpawnPoolWorker-40 DEBUG    upload finished in 2.053044s, attributes: file_id=790d529bf601
2025-05-26 19:26:46,837 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/053fd2f9fb06.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Remov

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:47,986 SpawnPoolWorker-38 DEBUG    upload finished in 1.899527s, attributes: file_id=c38c295848f5
2025-05-26 19:26:47,989 SpawnPoolWorker-38 DEBUG    upload finished in 1.903996s, attributes: file_id=c38c295848f5
upload:  94%|█████████▎| 989/1056 [03:03<00:21,  3.18it/s]2025-05-26 19:26:47,995 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/005ade0dd5ba.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:48,076 SpawnPoolWorker-37 DEBUG    upload finished in 1.738398s, attributes: file_id=bd9abb0e8a19
2025-05-26 19:26:48,077 Spaw

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:48,202 SpawnPoolWorker-41 DEBUG    upload finished in 1.65392s, attributes: file_id=140f77dbac3f
2025-05-26 19:26:48,202 SpawnPoolWorker-41 DEBUG    upload finished in 1.654771s, attributes: file_id=140f77dbac3f
2025-05-26 19:26:48,206 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/f603380e7977.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:48,275 SpawnPoolWorker-35 DEBUG    upload finished in 1.871077s, attributes: file_id=0366b7eda3f4
2025-05-26 19:26:48,276 SpawnPoolWorker-35 DEBUG    upload finished in 1.871568s, attributes: file_id=0366b7eda3f4
upload:  94%|█████████▍| 993/1056 [03:03<00:11,  5.72it/s]2025-05-26 19:26:48,278 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:48,499 SpawnPoolWorker-36 DEBUG    upload finished in 1.753685s, attributes: file_id=153f35b465d3
2025-05-26 19:26:48,499 SpawnPoolWorker-36 DEBUG    upload finished in 1.754275s, attributes: file_id=153f35b465d3
2025-05-26 19:26:48,502 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/09869e47499c.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:48,615 SpawnPoolWorker-40 DEBUG    upload finished in 1.778927s, attributes: file_id=053fd2f9fb06
2025-05-26 19:26:48,615 SpawnPool

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:48,747 SpawnPoolWorker-39 DEBUG    upload finished in 1.82711s, attributes: file_id=c3b53ea5ec1d
2025-05-26 19:26:48,747 SpawnPoolWorker-39 DEBUG    upload finished in 1.827598s, attributes: file_id=c3b53ea5ec1d
upload:  94%|█████████▍| 996/1056 [03:04<00:09,  6.05it/s]2025-05-26 19:26:48,749 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bf302c847496.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:49,624 SpawnPoolWorker-37 DEBUG    upload finished in 1.544995s, attributes: file_id=7fccc6e4983e
2025-05-26 19:26:49,626 SpawnPoolWorker-37 DEBUG    upload finished in 1.547183s, attributes: file_id=7fccc6e4983e
upload:  94%|█████████▍| 997/1056 [03:05<00:18,  3.16it/s]2025-05-26 19:26:49,629 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e6dc8e8655e8.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:49,637 SpawnPoolWorker-38 DEBUG    upload finished in 1.643716s, attributes: file_id=005ade0dd5ba
2025-05-26 19:26:49,637 SpawnPoolWorker-38 DEBUG    upload finished in 1.644629s, attributes: file_id=005ade0dd5ba
2025-05-26 19:26:49

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:49,837 SpawnPoolWorker-35 DEBUG    upload finished in 1.560078s, attributes: file_id=70f7ccdb3aec
2025-05-26 19:26:49,838 SpawnPoolWorker-35 DEBUG    upload finished in 1.560931s, attributes: file_id=70f7ccdb3aec
2025-05-26 19:26:49,840 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/3fb64c7a0440.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:50,019 SpawnPoolWorker-41 DEBUG    upload finished in 1.813754s, attributes: file_id=f603380e7977
2025-05-26 19:26:50,019 SpawnPoolWorker-41 DEBUG    upload finished in 1.814781s, attributes: file_id=f603380e7977
upload:  95%|█████████▍|

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:50,098 SpawnPoolWorker-36 DEBUG    upload finished in 1.597761s, attributes: file_id=09869e47499c
2025-05-26 19:26:50,099 SpawnPoolWorker-36 DEBUG    upload finished in 1.598246s, attributes: file_id=09869e47499c
2025-05-26 19:26:50,100 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/92426fc857cb.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:50,266 SpawnPoolWorker-40 DEBUG    upload finished in 1.649532s, attributes: file_id=1d8b71c0671c
2025-05-26 19:26:50,266 SpawnPoolWorker-40 DEBUG    upload finished in 1.650058s, attributes: file_id=1d8b71c0671c
upload:  95%|█████████▍| 1003/1056 [03:05<00:08,  6.02it/s]2025-05-26 19:26:5

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:50,375 SpawnPoolWorker-39 DEBUG    upload finished in 1.627127s, attributes: file_id=bf302c847496
2025-05-26 19:26:50,376 SpawnPoolWorker-39 DEBUG    upload finished in 1.627605s, attributes: file_id=bf302c847496
upload:  95%|█████████▌| 1004/1056 [03:05<00:08,  6.44it/s]2025-05-26 19:26:50,377 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9a601b572f32.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:51,060 SpawnPoolWorker-37 DEBUG    upload finished in 1.432085s, attributes: file_id=e6dc8e8655e8
2025-05-26 19:26:51,061 SpawnPoolWorker-37 DEBUG    upload finished in 1.433353s, attributes: file_id=e6dc8e8655e8
upload:  95%|█████████▌| 1005/1056 [03:06<00:13,  3.71it/s]2025-05-26 19:26:51,066 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/62d5fef55413.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:51,275 SpawnPoolWorker-38 DEBUG    upload finished in 1.636302s, attributes: file_id=28f99b43c57f
2025-05-26 19:26:51,276 SpawnPoolWorker-38 DEBUG    upload finished in 1.637252s, attributes: file_id=28f99b43c57f
upload:  95%|█████████▌| 1006/1056 [03:06<00:12,  3.90it/s]2025-05-26 19:26:51,280 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/6c58f37dca21.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:51,372 SpawnPoolWorker-34 DEBUG    upload finished in 1.563077s, attributes: file_id=4672c1860e4a
2025-05-26 19:26:51,373 SpawnPoolWorker-34 DEBUG    upload finished in 1.563631s, attributes: file_id=4672c1860e4a
2025-05-26 19:26:51,374 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cac

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:51,632 SpawnPoolWorker-41 DEBUG    upload finished in 1.610365s, attributes: file_id=673879e45694
2025-05-26 19:26:51,633 SpawnPoolWorker-41 DEBUG    upload finished in 1.611284s, attributes: file_id=673879e45694
upload:  96%|█████████▌| 1010/1056 [03:07<00:07,  6.51it/s]2025-05-26 19:26:51,636 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4f1dd08f7cd9.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:51,787 SpawnPoolWorker-40 DEBUG    upload finished in 1.519404s, attributes: file_id=0e20eb173ba9
2025-05-26 19:26:51,788 Spa

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:51,867 SpawnPoolWorker-39 DEBUG    upload finished in 1.490671s, attributes: file_id=9a601b572f32
2025-05-26 19:26:51,868 SpawnPoolWorker-39 DEBUG    upload finished in 1.491161s, attributes: file_id=9a601b572f32
2025-05-26 19:26:51,869 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8cecb8c96ef7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:52,543 SpawnPoolWorker-37 DEBUG    upload finished in 1.477904s, attributes: file_id=62d5fef55413
2025-05-26 19:26:52,544 SpawnPoolWorker-37 DEBUG    upload finished in 1.479589s, attributes: file_id=62d5fef55413
upload:  96%|█████████▌| 1013/1056 [03:08<00:10,  4.22it/s]2025-05-26 19:26:52,548 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bafd2d1ff737.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:52,770 SpawnPoolWorker-38 DEBUG    upload finished in 1.491524s, attributes: file_id=6c58f37dca21
2025-05-26 19:26:52,770 SpawnPoolWorker-38 DEBUG    upload finished in 1.492196s, attributes: file_id=6c58f37dca21
upload:  96%|█████████▌| 1014/1056 [03:08<00:09,  4.26it/s]2025-05-26 19:26:52,772 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/7748edb5e10b.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:52,994 SpawnPoolWorker-34 DEBUG    upload finished in 1.619981s, attributes: file_id=6d4d881bb905
2025-05-26 19:26:52,994 SpawnPoolWorker-34 DEBUG    upload finished in 1.620476s, attributes: file_id=6d4d881bb905
upload:  96%|█████████▌| 1015/1056 [03:08<00:09,  4.30it/s]2025-05-26 19:26:52,996 SpawnPoolWorker-34 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/1abf7e180021.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:53,068 SpawnPoolWorker-35 DEBUG    upload finished in 1.627148s, attributes: file_id=96d8d7883aa0
2025-05-26 19:26:53,068 SpawnPoolWorker-35 DEBUG    upload finished in 1.627749s, attributes: file_id=96d8d7883aa0
2025-05-26 19:26:5

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:53,249 SpawnPoolWorker-41 DEBUG    upload finished in 1.614254s, attributes: file_id=4f1dd08f7cd9
2025-05-26 19:26:53,250 SpawnPoolWorker-41 DEBUG    upload finished in 1.615101s, attributes: file_id=4f1dd08f7cd9
upload:  96%|█████████▋| 1019/1056 [03:08<00:04,  7.76it/s]2025-05-26 19:26:53,252 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/13c137ebd4aa.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:53,322 SpawnPoolWorker-40 DEBUG    upload finished in 1.533278s, attributes: file_id=39e8dfc6bb82
2025-05-26 19:26:53,323 SpawnPoolWorker-40 DEBUG    upload finished in 1.533854s, attributes: file_id=39e8dfc6bb82
2025-05-26 19:26:53,325 SpawnPoolWorker-40 DEBUG    /Users/liangmo/.cac

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:54,109 SpawnPoolWorker-38 DEBUG    upload finished in 1.337911s, attributes: file_id=7748edb5e10b
2025-05-26 19:26:54,110 SpawnPoolWorker-38 DEBUG    upload finished in 1.339094s, attributes: file_id=7748edb5e10b
upload:  97%|█████████▋| 1021/1056 [03:09<00:08,  4.32it/s]2025-05-26 19:26:54,113 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4bdba5812315.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:54,164 SpawnPoolWorker-37 DEBUG    upload finished in 1.617832s, attributes: file_id=bafd2d1ff737
2025-05-26 19:26:54,166 SpawnPoolWorker-37 DEBUG    upload finished in 1.620595s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:54,388 SpawnPoolWorker-35 DEBUG    upload finished in 1.318182s, attributes: file_id=8deeebff78fd
2025-05-26 19:26:54,388 SpawnPoolWorker-35 DEBUG    upload finished in 1.318967s, attributes: file_id=8deeebff78fd
upload:  97%|█████████▋| 1023/1056 [03:09<00:06,  4.97it/s]2025-05-26 19:26:54,391 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/ac11171923a1.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:54,464 SpawnPoolWorker-36 DEBUG    upload finished in 1.344701s, attributes: file_id=b420a2e0c8ea
2025-05-26 19:26:54,464 SpawnPoolWorker-36 DEBUG    upload finished in 1.345189s

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:54,594 SpawnPoolWorker-39 DEBUG    upload finished in 1.421681s, attributes: file_id=26c08148f5fd
2025-05-26 19:26:54,595 SpawnPoolWorker-39 DEBUG    upload finished in 1.42215s, attributes: file_id=26c08148f5fd
2025-05-26 19:26:54,596 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/c105c82d32d2.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:55,319 SpawnPoolWorker-38 DEBUG    upload finished in 1.206099s, attributes: file_id=4bdba5812315
2025-05-26 19:26:55,319 SpawnPoolWorker-38 DEBUG    upload finished in 1.207137s, attributes: file_id=4bdba5812315
upload:  97%|█████████▋| 1029/1056 [03:10<00:04,  5.60it/s]2025-05-26 19:26:55,322 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2f2398718f82.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:55,419 SpawnPoolWorker-37 DEBUG    upload finished in 1.250654s, attributes: file_id=03f7a20aae1a
2025-05-26 19:26:55,420 SpawnPoolWorker-37 DEBUG    upload finished in 1.252131s, attributes: file_id=03f7a20aae1a
upload:  98%|█████

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:55,665 SpawnPoolWorker-35 DEBUG    upload finished in 1.274535s, attributes: file_id=ac11171923a1
2025-05-26 19:26:55,665 SpawnPoolWorker-35 DEBUG    upload finished in 1.275176s, attributes: file_id=ac11171923a1
upload:  98%|█████████▊| 1031/1056 [03:11<00:04,  5.54it/s]2025-05-26 19:26:55,668 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/2102e74838f7.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:55,743 SpawnPoolWorker-36 DEBUG    upload finished in 1.278066s, attributes: file_id=b20c3d251eba
2025-05-26 19:26:55,744 SpawnPoolWorker-36 DEBUG    upload finished in 1.27861s, attributes: file_id=b20c3d251eba
2025-05-26 19:26:55

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

2025-05-26 19:26:55,873 SpawnPoolWorker-41 DEBUG    upload finished in 1.369405s, attributes: file_id=6a54f1cd3079
2025-05-26 19:26:55,874 SpawnPoolWorker-41 DEBUG    upload finished in 1.369992s, attributes: file_id=6a54f1cd3079
upload:  98%|█████████▊| 1036/1056 [03:11<00:01, 10.48it/s]2025-05-26 19:26:55,876 SpawnPoolWorker-41 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/fd49cff7eb69.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:56,513 SpawnPoolWorker-38 DEBUG    upload finished in 1.192294s, attributes: file_id=2f2398718f82
2025-05-26 19:26:56,514 SpawnPoolWorker-38 DEBUG    upload finished in 1.193515s, attributes: file_id=2f2398718f82
2025-05-26 19:26:56,519 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/e47a129ad5f4.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)


[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:57,045 SpawnPoolWorker-37 DEBUG    upload finished in 1.623018s, attributes: file_id=c1467d8096ef
2025-05-26 19:26:57,046 SpawnPoolWorker-37 DEBUG    upload finished in 1.623733s, attributes: file_id=c1467d8096ef
upload:  98%|█████████▊| 1038/1056 [03:12<00:04,  4.27it/s]2025-05-26 19:26:57,048 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/bfc7e681bef3.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None}, inplace=True)
2025-05-26 19:26:57,137 SpawnPoolWorker-35 DEBUG    upload finished in 1.47097s, attributes: file_id=2102e74838f7
2025-05-26 19:26:57,138 SpawnPoolWorker-35 DEBUG    upload finished in 1.471741s, attributes: file_id=2102e74838f7
2025-05-26 19:26:57

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:57,276 SpawnPoolWorker-36 DEBUG    upload finished in 1.531436s, attributes: file_id=da6bf0451ff0
2025-05-26 19:26:57,277 SpawnPoolWorker-39 DEBUG    upload finished in 1.504642s, attributes: file_id=0f00ad010b68
2025-05-26 19:26:57,277 SpawnPoolWorker-36 DEBUG    upload finished in 1.532486s, attributes: file_id=da6bf0451ff0
upload:  98%|█████████▊| 1040/1056 [03:12<00:03,  5.00it/s]2025-05-26 19:26:57,277 SpawnPoolWorker-39 DEBUG    upload finished in 1.505471s, attributes: file_id=0f00ad010b68
2025-05-26 19:26:57,282 SpawnPoolWorker-39 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/9eb85e469c3e.json not detected as batch file data
2025-05-26 19:26:57,282 SpawnPoolWorker-36 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/8d4fa2b55aa2.json not detected as batch file data
2025-05-26 19:26:57,286 SpawnPoolWorker-40 DEBUG    upload finished in 1.422719s, attributes: file_id=dfdd41863870
2025-05-26 19:26:57,286 SpawnPoolWorker-40 D

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_conn

Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:57,918 SpawnPoolWorker-38 DEBUG    upload finished in 1.400548s, attributes: file_id=e47a129ad5f4
2025-05-26 19:26:57,918 SpawnPoolWorker-38 DEBUG    upload finished in 1.401464s, attributes: file_id=e47a129ad5f4
upload:  99%|█████████▉| 1045/1056 [03:13<00:01,  5.82it/s]2025-05-26 19:26:57,921 SpawnPoolWorker-38 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/4bc82a50626e.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace({np.nan: None

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


Removed trailing semicolon and whitespace from query
2025-05-26 19:26:58,466 SpawnPoolWorker-35 DEBUG    upload finished in 1.326472s, attributes: file_id=d7a1438c847c
2025-05-26 19:26:58,467 SpawnPoolWorker-37 DEBUG    upload finished in 1.41915s, attributes: file_id=bfc7e681bef3
2025-05-26 19:26:58,467 SpawnPoolWorker-35 DEBUG    upload finished in 1.327219s, attributes: file_id=d7a1438c847c
upload:  99%|█████████▉| 1047/1056 [03:13<00:01,  5.00it/s]2025-05-26 19:26:58,467 SpawnPoolWorker-37 DEBUG    upload finished in 1.419832s, attributes: file_id=bfc7e681bef3
2025-05-26 19:26:58,470 SpawnPoolWorker-37 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/93299252d6f6.json not detected as batch file data
2025-05-26 19:26:58,470 SpawnPoolWorker-35 DEBUG    /Users/liangmo/.cache/unstructured/ingest/pipeline/indexer/41117e10b408.json not detected as batch file data
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: ht

[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}
[DEBUG] get_connection connect_kwargs: {'access_config': Secret('**********'), 'schema': 'clickzetta_doc_kb', 'service': 'uat-api.clickzetta.com', 'username': 'qiliang', 'instance': 'jnsxwfyr', 'workspace': 'quick_start', 'vcluster': 'DEFAULT_AP', 'connector_type': 'clickzetta', 'password': 'Ql123456!', 'paramstyle': 'qmark'}


2025-05-26 19:26:58,673 SpawnPoolWorker-40 DEBUG    upload finished in 1.385776s, attributes: file_id=d187f9625869
2025-05-26 19:26:58,674 SpawnPoolWorker-40 DEBUG    upload finished in 1.386669s, attributes: file_id=d187f9625869
2025-05-26 19:26:58,806 SpawnPoolWorker-36 DEBUG    upload finished in 1.526152s, attributes: file_id=8d4fa2b55aa2
2025-05-26 19:26:58,806 SpawnPoolWorker-36 DEBUG    upload finished in 1.52679s, attributes: file_id=8d4fa2b55aa2
upload: 100%|█████████▉| 1051/1056 [03:14<00:00,  7.18it/s]Removed trailing semicolon and whitespace from query
2025-05-26 19:26:59,004 SpawnPoolWorker-39 DEBUG    upload finished in 1.724137s, attributes: file_id=9eb85e469c3e
2025-05-26 19:26:59,005 SpawnPoolWorker-39 DEBUG    upload finished in 1.725491s, attributes: file_id=9eb85e469c3e
Removed trailing semicolon and whitespace from query
2025-05-26 19:26:59,365 SpawnPoolWorker-38 DEBUG    upload finished in 1.445125s, attributes: file_id=4bc82a50626e
2025-05-26 19:26:59,365 SpawnPo

PipelineError: Pipeline did not run successfully

In [None]:
# pipeline = Pipeline.from_configs(

#     context=ProcessorConfig(
#         verbose=True,
#         tqdm=True,
#         num_processes=20,
#     ),

#     indexer_config=S3IndexerConfig(remote_url=os.getenv("AWS_S3_NAME"), recursive=True, file_glob="**/*.md" ),
#     downloader_config=S3DownloaderConfig(),
#     source_connection_config=S3ConnectionConfig(
#         access_config=S3AccessConfig(
#             key=os.getenv("AWS_KEY"),
#             secret=os.getenv("AWS_SECRET"))
#     ),

#     partitioner_config=PartitionerConfig(
#         partition_by_api=False,
#         api_key=os.getenv("UNSTRUCTURED_API_KEY"),
#         partition_endpoint=os.getenv("UNSTRUCTURED_URL"),
#         strategy="hi_res",
#         additional_partition_args={
#             "split_pdf_page": True,
#             "split_pdf_allow_failed": True,
#             "split_pdf_concurrency_level": 15
#         }
#     ),

#     chunker_config=ChunkerConfig(
#         chunking_strategy="by_title",
#         chunk_max_characters=chunk_max_characters,
#         chunk_overlap=chunk_overlap,
#         chunk_combine_text_under_n_chars=200,
#     ),

#     embedder_config=EmbedderConfig(
#         embedding_provider = embedding_provider,
#         embedding_model_name = embedding_model_name,
#     ),

#     destination_connection_config=ClickzettaConnectionConfig(
#         access_config=ClickzettaAccessConfig(password=_password),
#         username=_username,
#         service=_service,
#         instance=_instance,
#         workspace=_workspace,
#         schema=_schema,
#         vcluster=_vcluster,
#     ),
#     stager_config=ClickzettaUploadStagerConfig(),
#     uploader_config=ClickzettaUploaderConfig(table_name=raw_table_name, documents_original_source="https://yunqi.tech/documents"),
# )

# pipeline.run()

### Clean/Transformation RAW table and Insert into Silver table

In [15]:
# You could excute more SQLs to clean and transform data before insert into Silver table.、
excute_sql(conn, clean_transformation_data_sql)

[['OPERATION SUCCEED']]

### Retrieve relevant documents from Singdata Lakehouse


In [16]:
from sentence_transformers import SentenceTransformer


def get_embedding(query):
    model = SentenceTransformer(embedding_model_name)
    return model.encode(query, normalize_embeddings=True)

def retrieve_documents(conn, query: str, num_results: int = 10):

    embedding = get_embedding(query)
    embedding_list = embedding.tolist()
    embedding_json = json.dumps(embedding_list)

    with conn.cursor() as cur:

        stmt = f"""
            WITH 
            vector_embedding_result AS (
            SELECT
                "vector_embedding" as retrieve_method,
                record_locator,
                type,
                filename,
                text,
                orig_elements,
                cosine_distance(embeddings, cast({embedding_list} as vector({embeddings_dimensions}))) AS score
            FROM {silver_table_name}
            ORDER BY score ASC
            LIMIT {num_results} 
            )
            SELECT    *  FROM      vector_embedding_result
           
            ORDER by score ASC;
        """

        cur.execute(stmt)

        results = cur.fetchall()
        columns = [desc[0] for desc in cur.description]  # Get column names from cursor description
        df = pd.DataFrame(results, columns=columns)
    return df

In [17]:
query_text = "创建索引的语法是什么？"
retrieve_documents_df = retrieve_documents(conn, query_text)
retrieve_documents_df

Unnamed: 0,retrieve_method,record_locator,type,filename,text,orig_elements,score
0,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,build-inverted-index.md,构建索引\n\n对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持\n\n语...,eJztWNtuGzcQ/ZXtvuQltnm/OE0fggRoiiC9xWiB2BB4GT...,0.273337
1,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-inverted-index.md,创建倒排索引\n\n具体介绍参考倒排索引介绍\n\n语法\n\nSQL CREATE TAB...,eJztXGlvG0cS/SsEv9gBfPR9eLEfEluLFeDYXkveDWALRB...,0.294409
2,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-vector-index.md,创建向量索引\n\n语法\n\nSQL CREATE TABLE table_name( c...,eJztXOlv3DYW/1eI+bCwAcfD++gFdFNjG6BNuom7XaAODJ...,0.326366
3,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-vector-index.md,已有的表增加向量索引\n\n语法\n\nSQL CREATE VECTOR INDEX [I...,eJztmN1v2zYQwP8VQU8JkA9+f6TYQ9d5QIAi6RJ3KBAHLk...,0.331133
4,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,inverted-index.md,案例\n\nSQL CREATE TABLE inverted_index_test( id...,eJztXWuTEzcW/Su9/jJQ4aH3lciyVUmYVNgiwMKwSRVQjh...,0.340637
5,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,inverted-index.md,倒排索引\n\n【预览发布】本功能当前处于公开预览发布阶段。\n\n倒排索引原理介绍\n\n...,eJztXelvHDeW/1d69SkBkpj3YUwW8E68OwYcZzZWZrGIAo...,0.346242
6,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,CREATE-BLOOMFILTER-INDEX.md,创建BLOOMFILTER索引\n\n功能\n\n布隆过滤器（Bloom Filter）是一...,eJztW1lvG0cS/isEX5QAkdX3YSAPsSNjBchWYCvYAJJA9F...,0.355196
7,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create-synonym.md,功能\n\n创建同义词，同义词synonym是一个数据库对象，类似给对象起一个别名。支持为以...,eJztW1lv29gV/iuEXqYPSXz3xUEf3MQDGPDSOs4UaWIodz...,0.367206
8,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,show-create-table.md,功能\n\nSHOW CREATE TABLE 命令用于获取指定表、物化视图或视图的创建语句...,eJztW21TW7kV/it3/CVkFozeX9JpZ2jibZhJYAukmXbJ2J...,0.370698
9,vector_embedding,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,create.md,功能描述\n\n创建指定类型的对象\n\n语法\n\nCREATE <object_type...,eJztmllvGzcQx7+KoJe+1DHvIygCKIrSCJAlwJLTpIkhkM...,0.375958


In [18]:
first_row__text = retrieve_documents_df.iloc[4]['text']
print(first_row__text)

案例

SQL CREATE TABLE inverted_index_test( id int, name string, INDEX id_index (id) INVERTED , INDEX name_index (name) INVERTED PROPERTIES('analyzer'='keyword','mode' = 'smart|max_word') );

已有的表增加倒排索引

语法

SQL CREATE INVERTED INDEX [IF NOT EXISTS] index_name ON TABLE [schema].table_name(col_name) [COMMENT 'comment'] PROPERTIES('analyzer'='english｜chinese|keyword｜unicode','mode' = 'smart|max_word')

INVERTED: 索引类型，倒排索引

index_name: 表名字，位于schema下，schema下索引名称不能重复

col_name：列名只支持单列

PROPERTIES：指定INDEX的参数，支持的属性目前支持指定分词。数值和日期类型则不需要指定properties，如果是字符串类型要求必须指定分词

说明

执行CREATE INDEX仅对新增数据有效，对已有数据进行索引请使用BUILD INDEX命令。

案例

```SQL CREATE TABLE inverted_index_test( id int, name string );

CREATE INVERTED INDEX id_index ON TABLE public.inverted_index_test(name) PROPERTIES('analyzer'='unicode') ```

使用倒排索引查询

构建索引

对存量数据添加倒排索引

语法

SQL -- 语法 1，默认给全表的存量数据加上倒排索引 BUILD INDEX index_name ON [schema].table_name; -- 语法 2，可指定partition，可指定一个或多个,支持=, !=, >, >=, <, <= BUILD INDEX index_name ON table_name WHERE

In [19]:
filename = retrieve_documents_df.iloc[0]['filename']
with conn.cursor() as cur:

        stmt = f"""
            WITH 
            results AS (
            SELECT
                record_locator,
                type,
                filename,
                text,
                orig_elements,
            FROM {silver_table_name}
            WHERE filename = "{filename}"
            )
            SELECT    *  FROM      results;
        """

        cur.execute(stmt)

        results = cur.fetchall()
        columns = [desc[0] for desc in cur.description]  # Get column names from cursor description
        df = pd.DataFrame(results, columns=columns)
df

Unnamed: 0,record_locator,type,filename,text,orig_elements
0,"{""path"": ""/Users/liangmo/yunqidoc/cn_markdown_...",CompositeElement,build-inverted-index.md,构建索引\n\n对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持\n\n语...,eJztWNtuGzcQ/ZXtvuQltnm/OE0fggRoiiC9xWiB2BB4GT...


In [20]:
all_text = "".join(df["text"].astype(str).tolist())
print(all_text)

构建索引

对存量数据添加索引。目前只支持向量索引和倒排索引。布隆过滤器不支持

语法

SQL -- 语法 1，默认给全表的存量数据加上索引 BUILD INDEX index_name ON [schema].table_name; -- 语法 2，可指定partition，可指定一个或多个,支持=, !=, >, >=, <, <= BUILD INDEX index_name ON table_name WHERE partition_name1 = '1' and partition_name2 = '2';

index_name：指定要添加索引名称

支持指定分区构建：可以指定一个或者多个

说明

执行BULD INDEX是一个同步任务，执行过程会消耗计算资源。查看进度可以通过Job Profile查看。

当分区表数据量较大时，建议以分区为粒度依次创建索引。

案例

SQL BUILD INDEX bulkload_data_index ON public.bulkload_data ;


In [21]:
kb = "ClickZetta\云器\Singdata之间的关系:ClickZetta是云器、Singdata的技术品牌。云器是ClickZetta的中国商业公司，Singdata是ClickZetta的海外商业公司。所以在不区分商业和技术的时候，三者是等同的。另外云器经常也会用到Yunqi的汉语拼音名称。"
embedded_kb = get_embedding(kb)
embedded_kb = embedded_kb.tolist()
add_kb_sql = f"""
INSERT INTO quick_start.clickzetta_doc_kb.{silver_table_name} (
  id, type, record_id, element_id, filetype, last_modified, languages, text, embeddings, date_created, date_modified, date_processed
) VALUES (
  uuid(), 'UserInput', uuid(), uuid(), 'text', CURRENT_TIMESTAMP, '["zh-cn"]',
  '{kb}',
  CAST('{embedded_kb}' AS vector(float,{embeddings_dimensions})), CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
);
"""

In [None]:
with conn.cursor() as cur:
        cur.execute(add_kb_sql)

: 