### Modeling (LLM)

#### Setup
(Only general setup, model specific imports are done within sections for the models)

In [1]:
# General (modify where necessary)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Read data

In [None]:
df = pd.read_csv('../../data/cleaned/Apple_Inc_text_data.csv')

# format datetime again
df['pub_date'] = pd.to_datetime(df['pub_date']) 
df['pub_date'] = df['pub_date'].dt.date

In [None]:
# Copy of df for sentiment analysis, may not actually be necessary, copied from previous nb
sentiment_df=df.copy() #save the df first 
df.head()

NameError: name 'df' is not defined

#### LLM: Gemini
nerfed by api limit

In [None]:
# !pip install -U -q "google-genai"

In [None]:
# Necessary packages
import os
from google import genai
# client = genai.Client(api_key='')

In [19]:
test = sentiment_df[['headline']].tail(10)
test.head()


Unnamed: 0,headline
2114,"The Tech That Needs Fixing in 2024, and What G..."
2115,"The True Price of Apple’s $3,500 Vision Pro Is..."
2116,Why Making Face Computers Cool Isn’t Easy
2117,How to Cut Down Your Screen Time but Still Get...
2118,Apple to Offer Rare Discount on iPhones in China


In [None]:
import google.genai as genai
import pandas as pd
import time
# Initialize the client
# client = genai.Client()

# Function to get sentiment based on model output
def gemini_predict(prompt):
    # Generate content from the model
    response = client.models.generate_content(
        model='gemini-2.0-flash', 
        contents=prompt
    )
    
    # Assuming the model output is a sentiment label (e.g., Positive, Negative)
    # You may need to adjust based on the actual format of the response
    sentiment = response.text.strip()
    
    # Return the sentiment label, default to 'Neutral' if the model is uncertain
    if sentiment not in ['Positive', 'Negative']:
        sentiment = 'Neutral'
    
    return sentiment

# Function to apply sentiment classification using the prompt for headlines
def find_sentiment_zero_shot(text):
    prompt = f"""Evaluate the sentiment conveyed by the headline with respect from an investment perspective. 
    Assign one of the following sentiment labels:
    Positive: For headlines with positive implications.
    Negative: For headlines with negative implications.
    Neutral: For headlines with unclear or neutral implications.
    
    Headline: {text}
    Sentiment: """
    
    # Get sentiment from the model
    sentiment = gemini_predict(prompt)
    return sentiment

def apply_with_delay(df, sentiment_column, delay=4):
    sentiment_list = []
    
    for headline in df[sentiment_column]:
        # Apply sentiment classification
        sentiment = find_sentiment_zero_shot(headline)
        sentiment_list.append(sentiment)
        
        # Delay to respect the RPM limit
        time.sleep(delay)  # Delay in seconds (delay = 4 seconds to stay within 15 requests per minute)
    
    # Add the results to the dataframe
    df['gemini_sentiment'] = sentiment_list
    return df

# Apply the function with delay to the sentiment DataFrame
test_df = apply_with_delay(test, 'headline')

# Display the results
print(test_df[['headline', 'gemini_sentiment']])




                                               headline gemini_sentiment
2114  The Tech That Needs Fixing in 2024, and What G...          Neutral
2115  The True Price of Apple’s $3,500 Vision Pro Is...         Negative
2116          Why Making Face Computers Cool Isn’t Easy         Negative
2117  How to Cut Down Your Screen Time but Still Get...          Neutral
2118   Apple to Offer Rare Discount on iPhones in China          Neutral
2119  Apple Takes a Humble Approach to Launching Its...          Neutral
2120  Apple Overhauls App Store in Europe, in Respon...          Neutral
2121  The Apple Vision Pro Is a Marvel. But Who Will...          Neutral
2122  U.S. Moves Closer to Filing Sweeping Antitrust...         Negative
2123                  Charms Can Personalize Your Watch          Neutral


In [21]:
sentiment_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2124 entries, 0 to 2123
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   pub_date          2124 non-null   object
 1   abstract          2124 non-null   object
 2   lead_para         2124 non-null   object
 3   headline          2124 non-null   object
 4   doc_type          2124 non-null   object
 5   section_name      2124 non-null   object
 6   type_of_material  2124 non-null   object
 7   rank              2124 non-null   int64 
 8   web_url           2124 non-null   object
dtypes: int64(1), object(8)
memory usage: 149.5+ KB


#### LLM: Llama

In [17]:
import asyncio
from pydantic import BaseModel
from ollama import AsyncClient

# Define the schema for sentiment analysis result
class SentimentInfo(BaseModel):
    sentiment: str  # 'positive', 'neutral', or 'negative'
    probability: float  # Probability score of the sentiment

class SentimentResponse(BaseModel):
    headline: str
    sentiment_info: SentimentInfo

async def main():
    client = AsyncClient()

    # Replace with the news headline you want to analyze
    news_headline = "The stock market surged today, hitting record highs due to strong earnings reports."

    # Request sentiment analysis from the model
    response = await client.chat(
        model='llama3.1',
        messages=[{'role': 'user', 'content': f'Analyze the sentiment of this news headline: "{news_headline}" and return it in JSON format.'}],
        format=SentimentResponse.model_json_schema(),  # Use Pydantic to generate the schema for response
        options={'temperature': 0},  # Make responses more deterministic
    )

    # Validate and parse the response
    sentiment_response = SentimentResponse.model_validate_json(response.message.content)

    # Print the sentiment analysis result
    print(f"Headline: {sentiment_response.headline}")
    print(f"Sentiment: {sentiment_response.sentiment_info.sentiment}")
    print(f"Probability: {sentiment_response.sentiment_info.probability}")

# Run the asynchronous function using asyncio in Jupyter
await main()



Headline: The stock market surged today, hitting record highs due to strong earnings reports.
Sentiment: POSITIVE
Probability: 0.9


In [19]:
import asyncio
from pydantic import BaseModel
from ollama import AsyncClient

# Define the schema for sentiment analysis result
class SentimentInfo(BaseModel):
    sentiment: str  # 'positive', 'neutral', or 'negative'
    probability: float  # Probability score of the sentiment

class SentimentResponse(BaseModel):
    headline: str
    sentiment_info: SentimentInfo

async def main():
    client = AsyncClient()

    # Few-shot examples (Replace with real examples)
    few_shot_examples = [
        {"headline": "The stock market surged today, hitting record highs due to strong earnings reports.", "sentiment": "positive", "probability": 0.92},
        {"headline": "The economy faces significant downturns as inflation rates rise.", "sentiment": "negative", "probability": 0.87},
        {"headline": "The new government policy has sparked mixed reactions, with some praising the changes while others remain skeptical.", "sentiment": "neutral", "probability": 0.55},
    ]

    # Create a prompt that includes the few-shot examples
    prompt = "Analyze the sentiment of the following news headlines and return the sentiment label (positive, negative, or neutral) along with the probability score.\n\n"
    
    # Add few-shot examples to the prompt
    for example in few_shot_examples:
        prompt += f"Headline: {example['headline']}\nSentiment: {example['sentiment']}\nProbability: {example['probability']}\n\n"
    
    # Now, ask for the sentiment of the actual news headline
    news_headline = "The stock market surged today, hitting record highs due to strong earnings reports."
    prompt += f"Headline: {news_headline}\nSentiment:"

    # Request sentiment analysis from the model
    response = await client.chat(
        model='llama3.1',  # Replace with the appropriate model name if necessary
        messages=[{'role': 'user', 'content': prompt}],
        format=SentimentResponse.model_json_schema(),  # Use Pydantic to generate the schema for response
        options={'temperature': 0},  # Make responses more deterministic
    )

    # Validate and parse the response
    sentiment_response = SentimentResponse.model_validate_json(response.message.content)

    # Print the sentiment analysis result
    print(f"Headline: {sentiment_response.headline}")
    print(f"Sentiment: {sentiment_response.sentiment_info.sentiment}")
    print(f"Probability: {sentiment_response.sentiment_info.probability}")

# Run the asynchronous function using asyncio in Jupyter
await main()


Headline: The stock market surged today, hitting record highs due to strong earnings reports.
Sentiment: positive
Probability: 0.92


#### LLM: DeepSeek

In [None]:
# TO DO