In [None]:
import ollama
import pandas as pd
import json
from tqdm.auto import tqdm

In [None]:
available_models = ollama.list()
for model in available_models['models']:
    print(model.model)


llama3.1:8b-instruct-q4_K_M
llama3.1:latest


In [None]:
model_name = available_models['models'][0].model
response = ollama.chat(
    model=model_name,
    messages=[{'role': 'user', 'content': 'Hello, respond in json format: {"response": "Hello, world!"}'}],
    format='json'
)
print(response['message']['content'])


{"response": "Hello, I'm ready to chat. How can I assist you today?"}


In [None]:
df = pd.read_csv('data/news/all_articles_combined.csv')

In [None]:
locations = []

with tqdm(total=len(df)) as pbar:
    for idx, row in df.iterrows():
        text = f"{row['title']} {row['description']} {row['keywords']}"
        
        response_relevance = ollama.chat(
            model=model_name,
            messages=[{
                'role': 'user',
                'content': f'Is this text about crime in a US location? Respond in JSON format: {{"relevance": true}} or {{"relevance": false}}\n\nText: {text}'
            }],
            format='json'
        )
        
        try:
            content = response_relevance['message']['content'].strip()
            if not content:
                relevance_data = {'relevance': False}
            else:
                relevance_data = json.loads(content)
        except (json.JSONDecodeError, KeyError, TypeError):
            relevance_data = {'relevance': False}
        
        if relevance_data.get('relevance'):
            response_location = ollama.chat(
                model=model_name,
                messages=[{
                    'role': 'user',
                    'content': f'Identify the exact US location mentioned in this text. Respond in JSON format: {{"location": "location name"}}\n\nText: {text}'
                }],
                format='json'
            )
            try:
                content = response_location['message']['content'].strip()
                if not content:
                    location_data = {'location': None}
                else:
                    location_data = json.loads(content)
            except (json.JSONDecodeError, KeyError, TypeError):
                location_data = {'location': None}
            locations.append(location_data.get('location'))
        else:
            locations.append(None)
        
        pbar.update(1)

df['location'] = locations


In [None]:
df.to_csv('data/news/all_articles_combined_machine_annotation.csv', index=False)
