# loadShooterDesc Function

In [13]:
import os

def loadShooterDesc(directory_path):
   shooter_desc = {}
   
   for filename in os.listdir(directory_path):
      if filename.endswith(".txt"):  # assuming shooter_desc are in .txt files
         file_path = os.path.join(directory_path, filename)

         with open(file_path) as f:
            raw_shooter_desc = f.read()

         filename_without_ext = os.path.splitext(filename)[0]  # remove .txt extension
         shooter_desc[filename_without_ext] = [text.strip() for text in raw_shooter_desc.split('=====')]

   return shooter_desc


In [14]:
shooter_desc = loadShooterDesc('.')
shooter_desc

{'shooter_desc': ["It's a guy.\nWearing black jacket\nhas a gun"]}

In [15]:
docs = [{'text': filename + ' | ' + section, 'path': filename} for filename, sections in shooter_desc.items() for section in sections]

# Sample the resulting data
docs[:2]

[{'text': "shooter_desc | It's a guy.\nWearing black jacket\nhas a gun",
  'path': 'shooter_desc'}]

In [16]:
un = "user1"
pw = "yoominchoi1234A"
cs = "localhost/FREEPDB1"

In [17]:
import oracledb
connection = oracledb.connect(user=un, password=pw, dsn=cs)

In [18]:
table_name = 'shooter_desc'

with connection.cursor() as cursor:
    # Create the table
    create_table_sql = f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id NUMBER PRIMARY KEY,
            payload CLOB CHECK (payload IS JSON),
            vector VECTOR
        )"""
    try:
        cursor.execute(create_table_sql)
    except oracledb.DatabaseError as e:
        raise

    connection.autocommit = True    

In [19]:
from sentence_transformers import SentenceTransformer

encoder = SentenceTransformer('all-MiniLM-L12-v2')

In [20]:
import array

# Define a list to store the data
data = [
    {"id": idx, "vector_source": row['text'], "payload": row}
    for idx, row in enumerate(docs)
]

# Collect all texts for batch encoding
texts = [f"{row['vector_source']}" for row in data]

# Encode all texts in a batch
embeddings = encoder.encode(texts, batch_size=32, show_progress_bar=True)

# Assign the embeddings back to your data structure
for row, embedding in zip(data, embeddings):
    row['vector'] = array.array("f", embedding)

Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 33.99it/s]


In [21]:
import json

with connection.cursor() as cursor:
    # Truncate the table
    cursor.execute(f"truncate table {table_name}")
    prepared_data = [(row['id'], json.dumps(row['payload']), row['vector']) for row in data]

    # Insert the data
    cursor.executemany(
        f"""INSERT INTO {table_name} (id, payload, vector)
            VALUES (:1, :2, :3)""", prepared_data)
    
    connection.commit()

In [22]:
with connection.cursor() as cursor:
    # Define the query to select all rows from a table
    query = f"SELECT * FROM {table_name}"

    # Execute the query
    cursor.execute(query)

    # Fetch all rows
    rows = cursor.fetchall()

    # Print the rows
    for row in rows[:5]:
        print(row)

(0, {'text': "shooter_desc | It's a guy.\nWearing black jacket\nhas a gun", 'path': 'shooter_desc'}, array('f', [-0.051288604736328125, 0.049132585525512695, -0.06931642442941666, -0.04338255524635315, 0.12153938412666321, -0.03359587863087654, 0.1437259167432785, -0.06074311211705208, -0.06812921166419983, 0.047271352261304855, 0.03384087607264519, -0.06134788691997528, 0.022374490275979042, -0.04195171222090721, 0.02387419156730175, -0.021798118948936462, -0.03255593776702881, 0.005038491915911436, 0.04417942836880684, -0.09163067489862442, -0.06282755732536316, -0.014342972077429295, -0.015006693080067635, 0.014726278372108936, -0.03752782195806503, -0.0824655219912529, 0.06621198356151581, 0.038732822984457016, -0.06109132990241051, 0.0001672468351898715, 0.11586898565292358, -0.033000119030475616, -0.02535323239862919, 0.04111591354012489, -0.04112708941102028, -0.02353784441947937, 0.019922804087400436, 0.03384121507406235, -0.06683097034692764, 0.029551811516284943, -0.092323757