In [None]:
import faiss
d = 768 # dimensions of nomic embedding, the embedding model that we're going to use
faiss_index = faiss.IndexFlatL2(d)

In [None]:
import os
import openai
os.environ['OPENAI_API_KEY'] = 'lm-studio'
os.environ['OPENAI_API_BASE'] = 'http://localhost:1234/v1'

openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_base = os.getenv('OPENAI_API_BASE')

In [None]:
from llama_index import Document
import pandas as pd
from llama_index import VectorStoreIndex, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore

# Load CSV data
with open(r'GPT\data\gpt_data.csv', 'r', encoding='utf-8', errors='replace') as file:
    df = pd.read_csv(file)

df['LLM Output']=None
for i, row in df.iterrows(): 
    faiss_index = faiss.IndexFlatL2(d)
    text = str(row['Incident Narrative'])
    text_list=[text]
    print(text_list)
    documents = [Document(text=t) for t in text_list]
    
    vector_store = FaissVectorStore(faiss_index=faiss_index)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, show_progress=True)
    query = """Your task is to classify each news article into one of the following categories. The output should only contain the category name, with no explanation and other output: 
    
    Unlawful detention
    Human trafficking
    Enslavement
    Willful killing of civilians
    Mass execution
    Kidnapping
    Extrajudicial killing
    Forced disappearance
    Damage or destruction of civilian critical infrastructure
    Damage or destruction, looting, or theft of cultural heritage
    Military operations (battle, shelling)
    Gender-based or other conflict-related sexual violence
    Violent crackdowns on protesters/opponents/civil rights abuse
    Indiscriminate use of weapons
    Torture or indications of torture
    Persecution based on political, racial, ethnic, gender, or sexual orientation
    Movement of military, paramilitary, or other troops and equipment
    
    Classify each news article into one of the above categories. Note that a single article can be classified into only one category based on the most prominent theme. If an article fits into multiple categories, select the one that best describes the primary issue."""
    response = index.as_query_engine(similarity_top_k=1).query(query)

    article_metadata_detected_str = response.response

    print(str(i) + " "+ article_metadata_detected_str)
    df.at[i,'LLM Output'] = article_metadata_detected_str
    #if i%100==0 and i!=0:
        #index=i/100
df.to_csv('Sudan_Conflict_SLC_noDef.csv', index=False, encoding='utf-8')