# Alpha Vantage Sentiment Scores
## Sentiment Score Definition:
x <= -0.35: Bearish \
-0.35 < x <= -0.15: Somewhat-Bearish \
-0.15 < x < 0.15: Neutral \
0.15 <= x < 0.35: Somewhat_Bullish \
x >= 0.35: Bullish 

## Relevance Score Definition: 
0 < x <= 1, with a higher score indicating higher relevance.

In [4]:
# Importing Packages
import json
import pandas as pd

In [5]:
def process_json_to_df(json_data):
    # Extract the feed list from JSON
    feed = json_data['feed']
    
    # Create a list to store processed records
    processed_records = []
    
    for item in feed:
        # Create a record dictionary with desired fields
        record = {
            'title': item['title'],
            'url': item['url'],
            'time_published': item['time_published'],
            'summary': item['summary'],
            'source': item['source'],
            'topics': {topic['topic']: topic['relevance_score'] for topic in item['topics']},
            'overall_sentiment_score': item['overall_sentiment_score'],
            'overall_sentiment_label': item['overall_sentiment_label'],
            'ticker_sentiment': {
                ticker['ticker']: {
                    'relevance_score': ticker['relevance_score'],
                    'sentiment_score': ticker['ticker_sentiment_score'],
                    'sentiment_label': ticker['ticker_sentiment_label']
                } for ticker in item['ticker_sentiment']
            }
        }
        processed_records.append(record)
    
    # Convert to DataFrame
    df = pd.DataFrame(processed_records)
    
    return df

In [None]:
def combine_sentiment_files(tickers):
    # List to store individual dataframes
    dfs = []
    
    for ticker in tickers:
        # Extract ticker from filename
        file = f"news_sentiment_{ticker}.json"
        
        # Read JSON file
        with open(file, 'r', encoding='utf-8') as f:
            json_data = json.load(f)
        
        # Process JSON data using your existing function
        temp_df = process_json_to_df(json_data)
        
        # Add ticker column
        temp_df['ticker'] = ticker
        
        # Select only required columns
        temp_df = temp_df[['ticker', 'time_published', 'overall_sentiment_score']]
        
        dfs.append(temp_df)
    
    # Combine all dataframes
    final_df = pd.concat(dfs, ignore_index=True)
    
    # Convert time_published to datetime (assuming format like "20241231T101500")
    final_df['time_published'] = pd.to_datetime(final_df['time_published'], format='%Y%m%dT%H%M%S')
    
    # Sort by time_published
    final_df = final_df.sort_values('time_published')
    final_df.index = final_df['time_published']
    
    return final_df

In [22]:
tickers = ["AEM", "FNV", "GFI", "GOLD", "KGC", "NEM", "WPM"]
final_df = combine_sentiment_files(tickers)
final_df

Unnamed: 0,ticker,time_published,overall_sentiment_score
1150,FNV,2022-03-01 08:00:00,-0.156506
1149,FNV,2022-03-07 13:48:00,0.144180
1748,GFI,2022-03-08 15:36:28,0.066281
1148,FNV,2022-03-09 08:49:30,-0.016950
4403,WPM,2022-03-09 13:00:00,0.010965
...,...,...,...
3193,NEM,2024-12-28 14:12:00,0.498358
2,AEM,2024-12-30 12:05:24,0.196444
1,AEM,2024-12-30 21:40:00,0.269193
3192,NEM,2024-12-31 10:15:00,0.260052
