In [7]:
import pandas as pd 

fox2018 = pd.read_csv("../../data/fox/fox2018.csv")

In [14]:
import pandas as pd
import os
import numpy as np
from datetime import datetime

# Define the base directory where your data is stored
data_dir = "../../data"

# Define the sources and their respective directories
sources = {
    "fox": os.path.join(data_dir, "fox"),
    "abc": os.path.join(data_dir, "abc"),
    "msnbc": os.path.join(data_dir, "msnbc")
}

# Function to extract the first value from V2Tone string
def extract_tone_score(tone_str):
    if pd.isna(tone_str):
        return np.nan
    
    try:
        # Split by comma and take the first value
        return float(str(tone_str).split(',')[0])
    except (ValueError, IndexError):
        return np.nan

# Initialize an empty DataFrame to store results
results = []

# Process each source
for source_name, source_dir in sources.items():
    print(f"Processing source: {source_name}")
    
    # Get all CSV files in the source directory
    csv_files = [f for f in os.listdir(source_dir) if f.endswith('.csv')]
    
    for csv_file in csv_files:
        # Extract year from filename
        year = int(csv_file.replace(f"{source_name}", "").replace(".csv", ""))
        file_path = os.path.join(source_dir, csv_file)
        
        print(f"  Processing {csv_file} (Year: {year})")
        
        try:
            # Read the CSV file
            df = pd.read_csv(file_path)
            
            # Check if required columns exist
            if 'parsed_date' not in df.columns or 'V2Tone' not in df.columns:
                print(f"    Error: Missing required columns in {csv_file}")
                continue
                
            # Convert parsed_date to datetime and extract month
            df['datetime'] = pd.to_datetime(df['parsed_date'])
            df['month'] = df['datetime'].dt.month
            
            # Extract the first tone score from V2Tone
            df['tone_score'] = df['V2Tone'].apply(extract_tone_score)
            
            # Calculate monthly averages
            monthly_avg = df.groupby('month')['tone_score'].mean().reset_index()
            
            # Add source and year columns
            monthly_avg['source'] = source_name
            monthly_avg['year'] = year
            
            # Append to results
            results.append(monthly_avg)
            
            print(f"    Processed {len(df)} articles, found {monthly_avg.shape[0]} months with data")
            
        except Exception as e:
            print(f"    Error processing {csv_file}: {str(e)}")
    
# Combine all results
if results:
    all_results = pd.concat(results, ignore_index=True)
    
    # Rename columns to match requested output
    all_results = all_results.rename(columns={'tone_score': 'average_tone'})
    
    # Reorder columns
    all_results = all_results[['source', 'year', 'month', 'average_tone']]
    
    # Sort by source, year, and month
    all_results = all_results.sort_values(['source', 'year', 'month'])
    
    # Save to CSV
    output_file = "../data/gdelt_monthly_tone_averages.csv"
    all_results.to_csv(output_file, index=False)
    
    print(f"\nAnalysis complete. Results saved to {output_file}")
    print(f"Total records: {len(all_results)}")
    
    # Display sample of results
    print("\nSample of results:")
    print(all_results.head(10))
    
    # Calculate overall source averages for comparison
    source_avg = all_results.groupby('source')['average_tone'].mean().reset_index()
    print("\nOverall average tone by source:")
    print(source_avg)
else:
    print("No results generated. Please check the data files and paths.")

Processing source: fox
  Processing fox2016.csv (Year: 2016)
    Processed 12000 articles, found 12 months with data
  Processing fox2017.csv (Year: 2017)
    Processed 12000 articles, found 12 months with data
  Processing fox2015.csv (Year: 2015)
    Processed 11000 articles, found 11 months with data
  Processing fox2023.csv (Year: 2023)
    Processed 12000 articles, found 12 months with data
  Processing fox2022.csv (Year: 2022)
    Processed 12000 articles, found 12 months with data
  Processing fox2020.csv (Year: 2020)
    Processed 12000 articles, found 12 months with data
  Processing fox2021.csv (Year: 2021)
    Processed 12000 articles, found 12 months with data
  Processing fox2025.csv (Year: 2025)
    Processed 3079 articles, found 4 months with data
  Processing fox2019.csv (Year: 2019)
    Processed 12000 articles, found 12 months with data
  Processing fox2018.csv (Year: 2018)
    Processed 12000 articles, found 12 months with data
  Processing fox2024.csv (Year: 2024)
 