In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # 1. Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")

    # 2. Create dense vector collection
    logging.info("Creating collection...")
    dim = 768
    metric_type = "L2"  # or "IP"
    collection = Collection(
        "dense_test",
        CollectionSchema([
            FieldSchema("id", DataType.INT64, is_primary=True),
            FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim)
        ]),
        consistency_level="Strong" 
    )
    logging.info(f"Collection created: {collection.name}")

    # 3. Create index and load
    logging.info("Creating index...")
    collection.create_index(
        "vector",
        {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
    )
    collection.load()
    logging.info("Index created and collection loaded")

    # 4. Insert dense vectors
    logging.info("Generating and inserting vectors...")
    vectors = np.random.randn(100, dim).astype(np.float32)
    collection.insert([list(range(100)), vectors.tolist()])
    logging.info(f"Inserted {len(vectors)} vectors")

    # 5. Verify Query
    logging.info("Running query...")
    res = collection.query(expr="id == 0", output_fields=["vector"])
    logging.info("Query results:")
    print(f"Query returned {len(res)} results:")
    for idx, result in enumerate(res):
        print(f"Result {idx}: ID={result['id']}, Vector={result['vector'][:5]}...")  # Show first 5 elements

    # 6. Verify Search
    logging.info("Running search...")
    search_result = collection.search(
        vectors[:1].tolist(),
        "vector",
        {"metric_type": metric_type, "params": {"nprobe": 10}},
        limit=3,
        output_fields=["vector"]
    )
    
    logging.info("Search results:")
    print(f"\nSearch returned {len(search_result[0])} results:")
    for idx, hit in enumerate(search_result[0]):
        print(f"Rank {idx+1}: ID={hit.id}, Distance={hit.distance:.4f}")
        print(f"Vector: {hit.entity.fields['vector'][:5]}...\n")

    # Cleanup
    logging.info("Cleaning up...")
    utility.drop_collection("dense_test")
    logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise

In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np
from scipy.sparse import random as sparse_random

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # 1. Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")

    # 2. Create dense vector collection
    logging.info("Creating collection...")
    dim = 768
    metric_type = "L2"  # or "IP"
    collection = Collection(
        "sparse_test",
        CollectionSchema([
            FieldSchema("id", DataType.INT64, is_primary=True),
            FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim)
        ]),
        consistency_level="Strong" 
    )
    logging.info(f"Collection created: {collection.name}")

    # 3. Create index and load
    logging.info("Creating index...")
    collection.create_index(
        "vector",
        {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
    )
    collection.load()
    logging.info("Index created and collection loaded")

    # 4. Insert dense vectors
    logging.info("Generating and inserting vectors...")
    vectors = np.random.randn(100, dim).astype(np.float32)
    collection.insert([list(range(100)), vectors.tolist()])
    logging.info(f"Inserted {len(vectors)} vectors")

    # 5. Verify Query
    logging.info("Running query...")
    res = collection.query(expr="id == 0", output_fields=["vector"])
    logging.info("Query results:")
    print(f"Query returned {len(res)} results:")
    for idx, result in enumerate(res):
        print(f"Result {idx}: ID={result['id']}, Vector={result['vector'][:5]}...")  # Show first 5 elements

    # 6. Verify Search
    logging.info("Running search...")
    search_result = collection.search(
        vectors[:1].tolist(),
        "vector",
        {"metric_type": metric_type, "params": {"nprobe": 10}},
        limit=3,
        output_fields=["vector"]
    )
    
    logging.info("Search results:")
    print(f"\nSearch returned {len(search_result[0])} results:")
    for idx, hit in enumerate(search_result[0]):
        print(f"Rank {idx+1}: ID={hit.id}, Distance={hit.distance:.4f}")
        print(f"Vector: {hit.entity.fields['vector'][:5]}...\n")

    # Cleanup
    logging.info("Cleaning up...")
    utility.drop_collection("sparse_test")
    logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise

In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np
from scipy.sparse import random as sparse_random

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")
    
    
    # Test multiple dimensions
    dimensions = [2, 64, 256, 1024, 4096]
    metric_type = "L2"
    
    for dim in dimensions:
        logging.info(f"\n{'='*30} Testing Dimension: {dim} {'='*30}")

        # Create dense vector collection
        logging.info("Creating collection...")
        dim = 768
        metric_type = "L2"  # or "IP"
        collection = Collection(
            "dim_test",
            CollectionSchema([
                FieldSchema("id", DataType.INT64, is_primary=True),
                FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim)
            ]),
            consistency_level="Strong" 
        )
        logging.info(f"Collection created: {collection.name}")

        # Create index and load
        logging.info("Creating index...")
        collection.create_index(
            "vector",
            {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
        )
        collection.load()
        logging.info("Index created and collection loaded")

        # Insert dense vectors
        logging.info("Generating and inserting vectors...")
        vectors = np.random.randn(100, dim).astype(np.float32)
        collection.insert([list(range(100)), vectors.tolist()])
        logging.info(f"Inserted {len(vectors)} vectors")

        # Verify Query
        logging.info("Running query...")
        res = collection.query(expr="id == 0", output_fields=["vector"])
        logging.info("Query results:")
        print(f"Query returned {len(res)} results:")
        for idx, result in enumerate(res):
            print(f"Result {idx}: ID={result['id']}, Vector={result['vector'][:5]}...")  # Show first 5 elements

        # Verify Search
        logging.info("Running search...")
        search_result = collection.search(
            vectors[:1].tolist(),
            "vector",
            {"metric_type": metric_type, "params": {"nprobe": 10}},
            limit=3,
            output_fields=["vector"]
        )
        
        logging.info("Search results:")
        print(f"\nSearch returned {len(search_result[0])} results:")
        for idx, hit in enumerate(search_result[0]):
            print(f"Rank {idx+1}: ID={hit.id}, Distance={hit.distance:.4f}")
            print(f"Vector: {hit.entity.fields['vector'][:5]}...\n")

        # Cleanup
        logging.info("Cleaning up...")
        utility.drop_collection("dim_test")
        logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise

In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # 1. Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")

    # 2. Create dense vector collection
    logging.info("Creating collection...")
    dim = 768
    metric_type = "L2"  # or "IP"
    collection = Collection(
        "scalar_test",
        CollectionSchema([
            FieldSchema("id", DataType.INT64, is_primary=True),
            FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim),
            FieldSchema("str_field", DataType.VARCHAR, max_length=200),
            FieldSchema("float_field", DataType.FLOAT),
            FieldSchema("int_field", DataType.INT32),
            FieldSchema("bool_field", DataType.BOOL),
            FieldSchema("ts_field", DataType.INT64)  # Timestamp stored as INT64
        ]),
        consistency_level="Strong"
    )
    logging.info(f"Collection created: {collection.name}")

    # 3. Create index and load
    logging.info("Creating index...")
    collection.create_index(
        "vector",
        {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
    )
    collection.load()
    logging.info("Index created and collection loaded")

    # 4. Insert dense vectors
    logging.info("Generating and inserting vectors...")
    vectors = np.random.randn(100, dim).astype(np.float32)
    data = [
        list(range(100)),  # IDs
        vectors,  # Vectors
        [f"str_{i}" + "a"*(i%10) for i in range(100)],  # VARCHAR
        np.random.rand(100).tolist(),  # FLOAT values
        list(np.random.randint(0, 10000, 100)),  # INT32
        [bool(i%2) for i in range(100)],  # BOOL
        [int(1672531200 + i*3600) for i in range(100)]  # INT64 timestamps (hourly)
    ]
    collection.insert(data)
    logging.info(f"Inserted {len(vectors)} vectors")

    # 5. Verify Query
    logging.info("Querying with scalar conditions...")
    res = collection.query(
        # expr="id == 0",
        # expr="bool_field == True",
        expr="float_field > 0.5 and bool_field == True",
        # expr="str_field like 'str_1%' and float_field > 0.5 and bool_field == True",
        output_fields=["*"]
    )
    print("\nScalar query results:")
    for r in res[:3]:  # Print first 3 results
        print(f"ID:{r['id']} | str:{r['str_field']} | float:{r['float_field']:.2f} | "
              f"int:{r['int_field']} | bool:{r['bool_field']} | ts:{r['ts_field']}")

    # Cleanup
    logging.info("Cleaning up...")
    utility.drop_collection("scalar_test")
    logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise

In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # 1. Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")

    # 2. Create dense vector collection
    logging.info("Creating collection...")
    dim = 768
    metric_type = "L2"  # or "IP"
    collection = Collection(
        "float_vector_test",
        CollectionSchema([
            FieldSchema("id", DataType.INT64, is_primary=True),
            FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim)
        ]),
        consistency_level="Strong" 
    )
    logging.info(f"Collection created: {collection.name}")

    # 3. Create index and load
    logging.info("Creating index...")
    collection.create_index(
        "vector",
        {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
    )
    collection.load()
    logging.info("Index created and collection loaded")

    # 4. Insert dense vectors
    logging.info("Generating and inserting vectors...")
    vectors = np.random.randn(100, dim).astype(np.float32)
    collection.insert([list(range(100)), vectors.tolist()])
    logging.info(f"Inserted {len(vectors)} vectors")

    # 5. Verify Query
    logging.info("Running query...")
    res = collection.query(expr="id == 0", output_fields=["vector"])
    logging.info("Query results:")
    print(f"Query returned {len(res)} results:")
    for idx, result in enumerate(res):
        print(f"Result {idx}: ID={result['id']}, Vector={result['vector'][:5]}...")  # Show first 5 elements

    # 6. Verify Search
    logging.info("Running search...")
    search_result = collection.search(
        vectors[:1].tolist(),
        "vector",
        {"metric_type": metric_type, "params": {"nprobe": 10}},
        limit=3,
        output_fields=["vector"]
    )
    
    logging.info("Search results:")
    print(f"\nSearch returned {len(search_result[0])} results:")
    for idx, hit in enumerate(search_result[0]):
        print(f"Rank {idx+1}: ID={hit.id}, Distance={hit.distance:.4f}")
        print(f"Vector: {hit.entity.fields['vector'][:5]}...\n")

    # Cleanup
    logging.info("Cleaning up...")
    utility.drop_collection("float_vector_test")
    logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise

In [None]:
import logging
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import numpy as np

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
try:
    # 1. Connect to Milvus
    logging.info("Connecting to Milvus server...")
    connections.connect(host='127.0.0.1', port='19530')
    logging.info("Successfully connected to Milvus")

    utility.drop_collection("json_test")
    # 2. Create dense vector collection
    logging.info("Creating collection...")
    dim = 768
    metric_type = "L2"  # or "IP"
    collection = Collection(
        "json_test",
        CollectionSchema([
            FieldSchema("id", DataType.INT64, is_primary=True),
            FieldSchema("vector", DataType.FLOAT_VECTOR, dim=dim),
            FieldSchema("metadata", DataType.JSON)
        ]),
        consistency_level="Strong"
    )
    logging.info(f"Collection created: {collection.name}")

    # 3. Create index and load
    logging.info("Creating index...")
    collection.create_index(
        "vector",
        {"index_type": "IVF_FLAT", "metric_type": metric_type, "params": {"nlist": 16}}
    )
    collection.load()
    logging.info("Index created and collection loaded")

    # 4. Insert dense vectors
    logging.info("Generating and inserting vectors...")
    vectors = np.random.randn(100, dim).astype(np.float32)
    data = [
        list(range(100)),
        vectors,
        [{
            "title": f"doc_{i}",
            "tags": ["tag1", "tag2"] if i%2 else ["tag3"],
            "stats": {"views": i*10, "rating": round(np.random.uniform(1, 5), 1)}
        } for i in range(100)]
    ]
    collection.insert(data)

    logging.info(f"Inserted {len(vectors)} vectors")

    # 5. Verify JSON query
    logging.info("Querying JSON data...")
    res = collection.query(
        expr="metadata['title'] == 'doc_0' and metadata['stats']['rating'] > 0",
        output_fields=["metadata", "id"]
    )
    print("\nJSON query results:")
    for r in res:
        print(f"ID:{r['id']} | Metadata:{r['metadata']}")

    # 6. Verify JSON in search results
    search_result = collection.search(
        data[1][:1],  # Use first vector
        "vector",
        param={"metric_type": "L2", "params": {"nprobe": 10}},
        limit=3,
        output_fields=["metadata"]
    )
    print("\nJSON in search results:")
    for hit in search_result[0]:
        print(f"ID:{hit.id} | Metadata:{hit.entity.fields['metadata']}")

    # Cleanup
    logging.info("Cleaning up...")
    utility.drop_collection("float_vector_test")
    logging.info("Collection dropped successfully")

except Exception as e:
    logging.error(f"Error occurred: {str(e)}", exc_info=True)
    raise