In [None]:
import pandas as pd
from openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

# Initialize OpenAI client
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

# Function to call OpenAI API
def get_completion(prompt):
    history = [
        {"role": "system", "content": "You are a natural language processing model used to perform basic NLP tasks."},
        {"role": "user", "content": prompt},
    ]
    completion = client.chat.completions.create(
        model="model-identifier",
        messages=history,
        temperature=0,
        stream=True,
    )
    output = ''
    for chunk in completion:
        output += str(chunk.choices[0].delta.content)
    return output

# Create prompt schema for Date and Geographic Location
Date = ResponseSchema(name="Date", description="Please identify the date of the event mentioned in the article. Provide the date in the format: 'MM/DD/YYYY'.")
Geographic = ResponseSchema(name="Geographic Location", description="Please determine the geographic location of the event described in the article. Specify the location in the format 'state, Country', for example, 'El Fasher, Sudan'.")

# Schema with all entities (fields) to be extracted
article_metadata_output_schema_parser = StructuredOutputParser.from_response_schemas([Date, Geographic])
article_metadata_output_schema = article_metadata_output_schema_parser.get_format_instructions()

article_metadata_prompt_template_str = """
Given in input a full news article that describes conflicts in Sudan, extract the following metadata according to the format instructions below. 
<< FORMATTING >>
{format_instructions}
<< INPUT >>
{news_article}
<< OUTPUT>>
"""

article_metadata_prompt_template = PromptTemplate.from_template(template=article_metadata_prompt_template_str)

# Load CSV data
df = pd.read_csv('Sudan_Incident_Log_May.csv', encoding='utf-8')

df['Geo_Meta_data'] = None

# Loop through each row in the dataframe
for i, row in df.iterrows():
    # Combine formatted year and incident narrative to form the content
    content = f"{row['Date']} {row['Incident Narrative']}"
    
    article_metadata_recognition_prompt = article_metadata_prompt_template.format(
        news_article=content,
        format_instructions=article_metadata_output_schema
    )

    article_metadata_detected_str = get_completion(article_metadata_recognition_prompt)

    df.at[i, 'Geo_Meta_data'] = article_metadata_detected_str


# Save the final DataFrame to CSV after processing all rows
df.to_csv('Sudan_Incident_Log_May_instruction.csv', index=False, encoding='utf-8-sig')
