In [49]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import openai
import time
from openai import OpenAI

In [50]:
models = client.models.list()
for model in models.data:
    print(model.id)

gpt-4-0613
gpt-4
gpt-3.5-turbo
gpt-4o-audio-preview-2025-06-03
gpt-4.1-nano-2025-04-14
gpt-4.1-nano
gpt-image-1
gpt-4o-realtime-preview-2025-06-03
davinci-002
babbage-002
gpt-3.5-turbo-instruct
gpt-3.5-turbo-instruct-0914
dall-e-3
dall-e-2
gpt-4-1106-preview
gpt-3.5-turbo-1106
tts-1-hd
tts-1-1106
tts-1-hd-1106
text-embedding-3-small
text-embedding-3-large
gpt-4-0125-preview
gpt-4-turbo-preview
gpt-3.5-turbo-0125
gpt-4-turbo
gpt-4-turbo-2024-04-09
gpt-4o
gpt-4o-2024-05-13
gpt-4o-mini-2024-07-18
gpt-4o-mini
gpt-4o-2024-08-06
chatgpt-4o-latest
o1-mini-2024-09-12
o1-mini
gpt-4o-realtime-preview-2024-10-01
gpt-4o-audio-preview-2024-10-01
gpt-4o-audio-preview
gpt-4o-realtime-preview
omni-moderation-latest
omni-moderation-2024-09-26
gpt-4o-realtime-preview-2024-12-17
gpt-4o-audio-preview-2024-12-17
gpt-4o-mini-realtime-preview-2024-12-17
gpt-4o-mini-audio-preview-2024-12-17
o1-2024-12-17
o1
gpt-4o-mini-realtime-preview
gpt-4o-mini-audio-preview
o3-mini
o3-mini-2025-01-31
gpt-4o-2024-11-20
gpt

In [51]:
#news api limits it to 100 for the free plan
load_dotenv()

NEWS_API_KEY = os.getenv("NEWS_API_KEY")

def fetch_headlines(query="AI", page_size=100):
    url = (
        f"https://newsapi.org/v2/everything?"
        f"q={query}&language=en&pageSize={page_size}&apiKey={NEWS_API_KEY}"
    )
    response = requests.get(url)
    data = response.json()

    headlines = [
        {"title": article["title"], "source": article["source"]["name"]}
        for article in data.get("articles", [])
    ]
    return pd.DataFrame(headlines)

# headlines
df = fetch_headlines("AI", page_size=100)
print(f"Total headlines retrieved: {len(df)}")
print(df.head())

Total headlines retrieved: 100
                                               title     source
0                Join Us for WIRED’s AI Power Summit      Wired
1                        Where Are All the AI Drugs?      Wired
2         Confessions of a Recovering AI Porn Addict      Wired
3  Android’s Circle to Search feature gets AI and...  The Verge
4  Amazon buys Bee AI wearable that listens to ev...  The Verge


In [52]:
openai.api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI()  # uses your OPENAI_API_KEY from .env

def extract_features_from_headline(headline):
    prompt = f"""
Analyze the following news headline and extract the following features:

Headline: "{headline}"

1. Sentiment (Positive, Negative, Neutral)
2. Main Topic (e.g., Politics, Finance, Technology, etc.)
3. Named Entities (People, Companies, Organizations)
4. Tone (e.g., Informative, Alarmist, Speculative, Promotional, Neutral)
5. Region/Location (e.g., US, Europe, China, Global, Unknown)
6. Intent of the headline (e.g., Report news, Persuade, Warn, Promote)
7. Time relevance (Past, Present, Future)
8. Emotion(s) evoked (e.g., Fear, Hope, Anger, Curiosity)
9. Is this headline clickbait? (Yes/No)

Respond in this format:
Sentiment: ...
Topic: ...
Entities: ...
Tone: ...
Region: ...
Intent: ...
Time: ...
Emotions: ...
Clickbait: ...
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o",  # ✅ Use gpt-4o here
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error: {e}")
        return None

In [53]:
def process_headlines(df):
    results = []

    for i, row in df.iterrows():
        print(f"Processing {i+1}/{len(df)}: {row['title']}")
        result = extract_features_from_headline(row['title'])
        time.sleep(1.5)

        if result:
            extracted = {
                "headline": row["title"],
                "source": row["source"],
                "sentiment": "N/A",
                "topic": "N/A",
                "entities": "N/A",
                "tone": "N/A",
                "region": "N/A",
                "intent": "N/A",
                "time": "N/A",
                "emotions": "N/A",
                "clickbait": "N/A"
            }

            for line in result.splitlines():
                for key in extracted.keys():
                    if line.lower().startswith(f"{key}:".lower()):
                        extracted[key] = line.split(":", 1)[1].strip()

            results.append(extracted)

    return pd.DataFrame(results)

In [54]:
labeled_df = process_headlines(df)
labeled_df.to_csv("labeled_headlines.csv", index=False)
print("Saved labeled_headlines.csv")

Processing 1/100: Join Us for WIRED’s AI Power Summit
Processing 2/100: Where Are All the AI Drugs?
Processing 3/100: Confessions of a Recovering AI Porn Addict
Processing 4/100: Android’s Circle to Search feature gets AI and gaming upgrades
Processing 5/100: Amazon buys Bee AI wearable that listens to everything you say
Processing 6/100: Meta snubs the EU’s voluntary AI guidelines
Processing 7/100: Google AI Mode will generate fake clothes to help you buy real ones
Processing 8/100: Breaking down Trump’s big gift to the AI industry
Processing 9/100: Perplexity just launched an AI web browser
Processing 10/100: The creepy AI era is here
Processing 11/100: People Are Using AI Chatbots to Guide Their Psychedelic Trips
Processing 12/100: Bryan Johnson Is Going to Die
Processing 13/100: Elon Musk teases AI anime boyfriend based on Edward Cullen
Processing 14/100: Proton is launching a privacy-focused AI chatbot
Processing 15/100: Figma’s AI app building tool is now available for everyone
P

In [55]:
import pandas as pd

# load data
labeled_df = pd.read_csv("labeled_headlines.csv")

In [56]:
# general overview
# the first 5 rows
print(labeled_df.head())

# columns
print("\nColumns:", labeled_df.columns.tolist())

#number of entries
print("\nTotal headlines:", len(labeled_df))

                                            headline     source sentiment  \
0                Join Us for WIRED’s AI Power Summit      Wired  Positive   
1                        Where Are All the AI Drugs?      Wired   Neutral   
2         Confessions of a Recovering AI Porn Addict      Wired  Negative   
3  Android’s Circle to Search feature gets AI and...  The Verge  Positive   
4  Amazon buys Bee AI wearable that listens to ev...  The Verge   Neutral   

        topic         entities         tone   region       intent     time  \
0  Technology            WIRED  Promotional  Unknown      Promote   Future   
1  Technology  None identified  Speculative  Unknown  Report news  Present   
2  Technology              NaN  Informative  Unknown  Report news  Present   
3  Technology          Android  Informative   Global  Report news  Present   
4  Technology   Amazon, Bee AI  Informative   Global  Report news  Present   

    emotions clickbait  
0  Curiosity        No  
1  Curiosity      

In [57]:
# summary of unique values in each category
for col in ["sentiment", "topic", "tone", "region", "intent", "time", "clickbait"]:
    print(f"\n{col.title()} value counts:")
    print(labeled_df[col].value_counts())


Sentiment value counts:
sentiment
Neutral                          45
Negative                         28
Positive                         26
Mixed (Positive and Negative)     1
Name: count, dtype: int64

Topic value counts:
topic
Technology                                                          87
Politics, Technology                                                 5
Technology, Politics                                                 3
Unknown (insufficient information to determine a specific topic)     1
Energy/Technology                                                    1
Entertainment/Technology                                             1
Finance/Technology                                                   1
Finance, Technology                                                  1
Name: count, dtype: int64

Tone value counts:
tone
Informative                 52
Speculative                 21
Alarmist                    15
Promotional                  8
Informative, Speculative 