In [11]:
import pandas as pd
import openai
from llama_index.core import VectorStoreIndex, Document
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import StorageContext
import faiss
import requests
import os

In [12]:
os.environ["OPENAI_API_KEY"] = 'lm-studio'
os.environ['OPENAI_API_BASE'] = 'http://localhost:1234/v1'

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

d = 768
faiss_index = faiss.IndexFlatL2(d)

# Article text
article_text = """
January 13, 2025 El-Fasher - Darfur 24 Five militants were killed on Monday in a shootout between Military Police and armed persons in Colombia market within Abu-Shouk camp for displaced persons in El-Fasher, the Capital of North Darfur. Abu-Shouk camp market is witnessing a fierce campaign these days by Military Police in coordination with the Intelligence of the Joint Forces of Armed Movements to combat military phenomena, drug and arms trafficking. Eyewitnesses told 'Darfur 24' that 'Military Police and the Intelligence of Joint Forces continued their raid on Colombia market for the third consecutive day as part of their efforts to combat military phenomena, drug trafficking and arms sales in Abu-Shouk camp.' The witnesses reported that the raid was accompanied by gunfire between Military Police and the militants, which resulted in the death of 5 militants and the injury of army soldiers and fighters from the Joint Forces, and dozens of militants were arrested. An intelligence source revealed to 'Darfur 24' that the fight against the military presence, sale of weapons and drugs by militants began on Saturday and is still ongoing. He confirmed the involvement of soldiers in the Army and the Joint forces in drug, weapons trade and the looting of citizens’ property. Last week, displaced people in Abu-Shouk camp denounced the presence of military manifestations in camp market, before they made an explicit accusation against the Army of militarizing the camp, which prompted Rapid Support Forces to directly target the displaced, resulting in dozens of deaths and injuries.
"""

# Create a new DataFrame and save it to a CSV file
data = {'Text': [article_text]}
df = pd.DataFrame(data)
df.to_csv('automap.csv', index=False, encoding='utf-8-sig')


In [13]:
# Initialize a column for metadata
df['Geo_Meta_data'] = None

text = article_text
text_list = [text]
print("Processing text:", text_list)
documents = [Document(text=t) for t in text_list]

# Set up the FAISS vector store
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, show_progress=True)

# Query to extract geographic metadata
query = """Identify the geographic location(s) and the time(s) where the incident(s) occurred in the format of "neighborhood, state, country | mm/dd/yyyy". If there are multiple locations, list each location with its corresponding date on a separate line without any explanation."""
response = index.as_query_engine(similarity_top_k=1).query(query)

# Split the response into individual lines and create a DataFrame
lines = response.response.strip().split('\n')
metadata_df = pd.DataFrame({'Geo_Meta_data': lines})
print("Initial Metadata DataFrame:")
print(metadata_df)


Processing text: ["\nJanuary 13, 2025 El-Fasher - Darfur 24 Five militants were killed on Monday in a shootout between Military Police and armed persons in Colombia market within Abu-Shouk camp for displaced persons in El-Fasher, the Capital of North Darfur. Abu-Shouk camp market is witnessing a fierce campaign these days by Military Police in coordination with the Intelligence of the Joint Forces of Armed Movements to combat military phenomena, drug and arms trafficking. Eyewitnesses told 'Darfur 24' that 'Military Police and the Intelligence of Joint Forces continued their raid on Colombia market for the third consecutive day as part of their efforts to combat military phenomena, drug trafficking and arms sales in Abu-Shouk camp.' The witnesses reported that the raid was accompanied by gunfire between Military Police and the militants, which resulted in the death of 5 militants and the injury of army soldiers and fighters from the Joint Forces, and dozens of militants were arrested. 

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Initial Metadata DataFrame:
                                       Geo_Meta_data
0  Colombia market, Abu-Shouk camp, El-Fasher, No...


In [14]:
# Iterate through each row to validate and fix the format
for i, row in metadata_df.iterrows():
    validation_query = f"""
    Ensure that the following line adheres to the format:
    1. "neighborhood, state, country | mm/dd/yyyy"
    2. Incident Location and Incident Date is split by "|"
    3. No additional text or explanation.
    
    If the line is already valid, return it as is. If not, fix it.

    Line: {row['Geo_Meta_data']}
    """
    validation_response = index.as_query_engine(similarity_top_k=1).query(validation_query)
    validated_line = validation_response.response.strip()
    metadata_df.at[i, 'Geo_Meta_data'] = validated_line

print("Validated Metadata DataFrame:")
print(metadata_df)


Validated Metadata DataFrame:
                                       Geo_Meta_data
0  Colombia market, Abu-Shouk camp, El-Fasher, No...


In [15]:
# Add the original article text to each row
metadata_df['Text'] = article_text


# Function to split Geo_Meta_data into Incident Location and Incident Date
def split_geo_meta_data(df):
    # Ensure Geo_Meta_data exists and is properly formatted
    if 'Geo_Meta_data' in df.columns:
        # Split Geo_Meta_data into Incident Location and Incident Date
        df[['Incident Location', 'Incident Date']] = df['Geo_Meta_data'].str.split('|', expand=True)
        # Strip whitespace
        df['Incident Location'] = df['Incident Location'].str.strip()
        df['Incident Date'] = df['Incident Date'].str.strip()
        # Drop the original Geo_Meta_data column
        df.drop(columns=['Geo_Meta_data'], inplace=True)
    return df


# Apply the function to the DataFrame
metadata_df = split_geo_meta_data(metadata_df)

# Save the expanded DataFrame
metadata_df.to_csv('automap.csv', index=False, encoding='utf-8-sig')
print("Processed data saved to 'automap.csv'.")


Processed data saved to 'automap.csv'.


In [16]:
""""""""""""""""""""
"""CLASSIFICATION"""
""""""""""""""""""""

''

In [17]:
input_csv = "automap.csv"
df = pd.read_csv(input_csv, encoding='utf-8')
article_column = 'Text'

In [18]:
df['Classification'] = None
for i, row in df.iterrows(): 
    faiss_index = faiss.IndexFlatL2(d)
    text = str(row['Text'])
    text_list=[text]
    print(text_list)
    documents = [Document(text=t) for t in text_list]
    
    vector_store = FaissVectorStore(faiss_index=faiss_index)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, show_progress=True)
    query = """Your task is to classify each news article into one of the following categories. The output should only contain the category name, with no explanation and other output: 
    
    Unlawful detention - Refers to refers to the act of detaining or confining an individual without legal justification or due process. For example, if protesters are arrested and detained without legal basis during peaceful demonstrations, with no access to legal representation, this would be considered unlawful detention. 

 
    Human trafficking - Refers to the act of recruiting, transporting, transferring, harboring, or receiving individuals through force, fraud, coercion, or other forms of deception for the purpose of exploitation. Exploitation can take many forms, including forced labor, sexual exploitation, slavery, servitude, or the removal of organs. It is considered a severe violation of human rights and is illegal under international and domestic laws. If an incident is considered Human trafficking it would also be considered as a War crime. 


    Enslavement - refers to the act of exercising ownership or control over another person, treating them as property, and depriving them of their freedom. It often involves forcing individuals to perform labor or services under coercion, violence, or the threat of punishment. If an incident is considered Enslavement, it would also be considered as a War crime. 


    Willful killing of civilians - Refers to the intentional killing of civilians who are not directly participating in hostilities, with full knowledge of their noncombatant status. This includes acts like massacres, executions, or deliberate bombings of civilian sites such as homes, schools, or hospitals, where the clear intent is to cause death. For example, a military unit massacring the residents of a village. 
    

    Mass execution - Refers to the deliberate killing of a large scale number of individuals, often carried out by state or non-state actors as part of systematic persecution, acts of war, or punitive measures. The victims are typically selected based on political, ethnic, religious, or social affiliations, and the killings are often premeditated and organized. If an incident is considered Mass execution, it would also be considered as a War crime. 


    Kidnapping - Refers to the unlawful and intentional abduction, confinement, or holding of an individual against their will, often for a specific purpose such as extortion, ransom, political leverage, forced labor, or exploitation. It is a serious crime and violates the individual's right to freedom and security. 
    

    Extrajudicial killing - Refers to the killing of a person without any legal process, such as arrest, trial, or sentencing. It is carried out outside the law, often by state agents or with their approval. 


    Forced disappearance - Refers the act of abducting or detaining a person against their will, followed by a refusal to disclose their fate or whereabouts. This leaves the victim outside the protection of the law and often causes anguish to their family and community. 


    Damage or destruction of civilian critical infrastructure - Refers to the reckless harm, sabotage, or destruction of essential facilities, systems, or services necessary for the well-being, safety, and survival of civilian populations. This includes infrastructure such as hospitals, water supplies, power grids, schools, transportation systems, and communication networks. 


    Damage or destruction, looting, or theft of cultural heritage - Refers to the harm, removal, or appropriation of culturally significant sites, objects, or artifacts during conflicts, disasters, or other destabilizing events. These acts violate international laws that protect cultural heritage as part of humanity's shared history and identity. Furthermore, this also refers to looting incidents. 


    Military operations (battle, shelling) - Refers to actions explicitly conducted between opposing armed forces, such as the RSF and SAF, during a conflict or war. These actions involve the use of weapons, strategies, and tactics to achieve military objectives, focusing on direct engagements or operations targeting enemy positions. Narratives mentioning attacks on civilian areas or indiscriminate shelling are not included in this category, even if long-range weapons or artillery are used. 
    

    Gender-based or other conflict-related sexual violence - Refers to acts of sexual violence committed during or as a result of armed conflict, often targeting individuals based on their gender, identity, or perceived vulnerability. Incidents such as rape or sexual harassment are considered Gender-based or other conflict-related sexual violence. 


    Violent crackdowns on protesters/opponents/civil rights abuse - Refers to the use of excessive or unlawful force suppress dissent, silence opposition. These acts often involve targeting individuals or groups engaging in protests, political opposition, or advocacy for civil rights. 


    Indiscriminate use of weapons - Refers to the use of weapons, such as shelling or bombing, in a manner that impacts buildings, neighborhoods, or areas without clear differentiation between combatants and civilians, or military and civilian infrastructure. This category applies only to incidents involving the use of explosives or long-range weapons that cause widespread harm or destruction, regardless of whether brute force or a massacre is involved, unless explicitly mentioned. 
  

    Torture or indications of torture - Refers to the infliction of severe physical or psychological pain and suffering on a person, typically to punish, intimidate, extract information, or coerce. 
 

    Persecution based on political, racial, ethnic, gender, or sexual orientation - Refers to the systematic mistreatment, harassment, or oppression of individuals or groups due to their political beliefs, race, ethnicity, gender identity, or sexual orientation. 
  

    Movement of military, paramilitary, or other troops and equipment - Refers to the deployment, transfer, or relocation of armed forces, armed groups, or their equipment as part of strategic or operational objectives. This movement may occur during preparation for conflict, active military operations, or in maintaining a presence in certain areas. 

 
    Classify each news article into one of the above categories. Note that a single article can be classified into only one category based on the most prominent theme. If an article fits into multiple categories, select the one that best describes the primary issue."""
    
    response = index.as_query_engine(similarity_top_k=1).query(query)

    article_classification_detected = response.response

    print(str(i) + " "+ article_classification_detected)
    
    df.at[i,'Classification'] = article_classification_detected
df.to_csv('automap.csv', index=False, encoding='utf-8')

["\nJanuary 13, 2025 El-Fasher - Darfur 24 Five militants were killed on Monday in a shootout between Military Police and armed persons in Colombia market within Abu-Shouk camp for displaced persons in El-Fasher, the Capital of North Darfur. Abu-Shouk camp market is witnessing a fierce campaign these days by Military Police in coordination with the Intelligence of the Joint Forces of Armed Movements to combat military phenomena, drug and arms trafficking. Eyewitnesses told 'Darfur 24' that 'Military Police and the Intelligence of Joint Forces continued their raid on Colombia market for the third consecutive day as part of their efforts to combat military phenomena, drug trafficking and arms sales in Abu-Shouk camp.' The witnesses reported that the raid was accompanied by gunfire between Military Police and the militants, which resulted in the death of 5 militants and the injury of army soldiers and fighters from the Joint Forces, and dozens of militants were arrested. An intelligence s

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

0 Military operations (battle, shelling) 



In [19]:
import requests
import os

# Access the Google Maps API key from the environment
api_key = 'AIzaSyDA2v528XI3f2ZsdOJjPVfQJbNMcfblY4E'

# Load the CSV file
df = pd.read_csv("automap.csv")

# Prepare lists to store latitude and longitude
latitudes = []
longitudes = []

# Iterate over each location in the "Location" column
for location in df['Incident Location']:
    location = location.strip()  # Remove any leading or trailing whitespace
    url = f'https://maps.googleapis.com/maps/api/geocode/json?address={location}&key={api_key}'
    response = requests.get(url)
    
    # Check for a successful response
    if response.status_code == 200:
        data = response.json()
        if 'results' in data and data['results']:
            first_result = data['results'][0]
            latitude = first_result['geometry']['location']['lat']
            longitude = first_result['geometry']['location']['lng']
            latitudes.append(latitude)
            longitudes.append(longitude)
        else:
            # Append None if no result is found
            latitudes.append(None)
            longitudes.append(None)
    else:
        print(f"Request failed for location '{location}' with status code {response.status_code}.")
        latitudes.append(None)
        longitudes.append(None)
        
# Add latitude and longitude columns to the DataFrame
df['Latitude'] = latitudes
df['Longitude'] = longitudes

# Save the DataFrame to a new CSV file
df.to_csv("automap.csv", index=False)
print("CSV file with location coordinates saved as 'automap.csv'. ")

CSV file with location coordinates saved as 'automap.csv'. 
