In [4]:
pip install requests pandas nltk


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Setup
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

API_KEY = 'ffe56edd243ebc59225204486a1d4c17'
BASE_URL = 'https://gnews.io/api/v4/search'

# Country codes for key Malaysia trading partners
countries = {
    'us': 'United States',
    'de': 'Germany',
    'cn': 'China',
    'my': 'Malaysia',
    'vn': 'Vietnam',
    'kr': 'South Korea',
    'mx': 'Mexico'
}

# Focus: 2020 to 2023 only
date_list = pd.date_range(start='2020-01-01', end='2023-12-31', freq='MS')

# Focus: tariff and electronic keywords
keywords = 'tariff AND electronic'

# Collect data
all_data = []

for code, name in countries.items():
    print(f"Collecting for {name}")
    for date in date_list:
        from_date = date.strftime('%Y-%m-%d')
        to_date = (date + timedelta(days=30)).strftime('%Y-%m-%d')
        
        params = {
            'q': keywords,
            'lang': 'en',
            'country': code,
            'from': from_date,
            'to': to_date,
            'max': 100,
            'apikey': API_KEY
        }
        
        response = requests.get(BASE_URL, params=params)
        
        if response.status_code == 200:
            data = response.json()
            for article in data.get('articles', []):
                text = (article.get('title') or '') + '. ' + (article.get('description') or '')
                sentiment_score = sia.polarity_scores(text)['compound']
                all_data.append({
                    'country_code': code,
                    'country_name': name,
                    'publishedAt': article['publishedAt'],
                    'title': article['title'],
                    'description': article['description'],
                    'content': article['content'],
                    'url': article['url'],
                    'sentiment': sentiment_score
                })
        else:
            print(f"Error: {response.status_code} for {name} on {from_date}")
        
        time.sleep(1)  # Respect API rate limits


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\razin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Collecting for United States
Collecting for Germany
Collecting for China
Collecting for Malaysia
Collecting for Vietnam
Collecting for South Korea
Collecting for Mexico


In [5]:
# Convert to DataFrame
df = pd.DataFrame(all_data)
df['publishedAt'] = pd.to_datetime(df['publishedAt'])
df['year'] = df['publishedAt'].dt.year

# Aggregate sentiment by country and year
annual_sentiment = df.groupby(['country_name', 'year'])['sentiment'].mean().reset_index()

# Save results
df.to_csv('sentiment_tariff_electronics_detailed.csv', index=False)
annual_sentiment.to_csv('sentiment_tariff_electronics_annual.csv', index=False)
