In [1]:
import pandas as pd
import json
from transformers import pipeline
import numpy as np

In [2]:
with open('calendar.json','r') as f:
      data = json.load(f)

In [3]:
data

[{'date': 'Sun',
  'time': '11:30pm',
  'currency': 'USD',
  'impact': 'Low',
  'event': 'FOMC Member Barkin Speaks',
  'actual': None,
  'forecast': None,
  'previous': None},
 {'date': 'Mon',
  'time': '5:20am',
  'currency': 'JPY',
  'impact': 'Low',
  'event': 'Bank Lending y/y',
  'actual': '2.4%',
  'forecast': '2.8%',
  'previous': '2.8%'},
 {'date': 'Mon',
  'time': None,
  'currency': 'JPY',
  'impact': 'Low',
  'event': 'Current Account',
  'actual': '2.72T',
  'forecast': '2.42T',
  'previous': '2.91T'},
 {'date': 'Mon',
  'time': '10:30am',
  'currency': 'JPY',
  'impact': 'Low',
  'event': 'Economy Watchers Sentiment',
  'actual': '42.6',
  'forecast': '44.7',
  'previous': '45.1'},
 {'date': 'Mon',
  'time': '1:30pm',
  'currency': 'GBP',
  'impact': 'Low',
  'event': 'MPC Member Lombardelli Speaks',
  'actual': None,
  'forecast': None,
  'previous': None},
 {'date': 'Mon',
  'time': 'All Day',
  'currency': 'EUR',
  'impact': 'Low',
  'event': 'Eurogroup Meetings',
  'a

In [4]:
df = pd.DataFrame(data)

In [5]:
def clean_percent(val):
  try:
    return float(val.strip('%'))
  except:
    return None

In [6]:
df['actual_val'] = df['actual'].apply(clean_percent)
df['forecast_val'] = df['forecast'].apply(clean_percent)
df['previous_val'] = df['previous'].apply(clean_percent)

In [7]:
df

Unnamed: 0,date,time,currency,impact,event,actual,forecast,previous,actual_val,forecast_val,previous_val
0,Sun,11:30pm,USD,Low,FOMC Member Barkin Speaks,,,,,,
1,Mon,5:20am,JPY,Low,Bank Lending y/y,2.4%,2.8%,2.8%,2.4,2.8,2.8
2,Mon,,JPY,Low,Current Account,2.72T,2.42T,2.91T,,,
3,Mon,10:30am,JPY,Low,Economy Watchers Sentiment,42.6,44.7,45.1,42.6,44.7,45.1
4,Mon,1:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
104,Fri,7:30pm,USD,High,Prelim UoM Consumer Sentiment,,53.1,52.2,,53.1,52.2
105,Fri,,USD,High,Prelim UoM Inflation Expectations,,,6.5%,,,6.5
106,Fri,8:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,,,
107,Sat,1:30am,USD,Low,TIC Long-Term Purchases,,44.2B,112.0B,,,


In [8]:
for col in ['actual_val','forecast_val','previous_val']:
  mean_val = df[col].mean()
  df[col].fillna(mean_val,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_val,inplace=True)


In [9]:
df

Unnamed: 0,date,time,currency,impact,event,actual,forecast,previous,actual_val,forecast_val,previous_val
0,Sun,11:30pm,USD,Low,FOMC Member Barkin Speaks,,,,12.64375,7.064583,5.961311
1,Mon,5:20am,JPY,Low,Bank Lending y/y,2.4%,2.8%,2.8%,2.40000,2.800000,2.800000
2,Mon,,JPY,Low,Current Account,2.72T,2.42T,2.91T,12.64375,7.064583,5.961311
3,Mon,10:30am,JPY,Low,Economy Watchers Sentiment,42.6,44.7,45.1,42.60000,44.700000,45.100000
4,Mon,1:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,12.64375,7.064583,5.961311
...,...,...,...,...,...,...,...,...,...,...,...
104,Fri,7:30pm,USD,High,Prelim UoM Consumer Sentiment,,53.1,52.2,12.64375,53.100000,52.200000
105,Fri,,USD,High,Prelim UoM Inflation Expectations,,,6.5%,12.64375,7.064583,6.500000
106,Fri,8:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,12.64375,7.064583,5.961311
107,Sat,1:30am,USD,Low,TIC Long-Term Purchases,,44.2B,112.0B,12.64375,7.064583,5.961311


In [10]:
def infer_impact(row):
  if pd.notnull(row['impact']):
    return row['impact']

  forecast = row['forecast_val']
  actual = row['actual_val']
  if forecast == 0:
    return 'Low'

  deviation = abs(actual - forecast) / forecast
  if deviation >0.05:
    return 'High'
  elif deviation > 0.02:
    return 'Medium'
  else:
    return 'Low'


df['impact']=df.apply(infer_impact, axis = 1)

In [11]:
df

Unnamed: 0,date,time,currency,impact,event,actual,forecast,previous,actual_val,forecast_val,previous_val
0,Sun,11:30pm,USD,Low,FOMC Member Barkin Speaks,,,,12.64375,7.064583,5.961311
1,Mon,5:20am,JPY,Low,Bank Lending y/y,2.4%,2.8%,2.8%,2.40000,2.800000,2.800000
2,Mon,,JPY,Low,Current Account,2.72T,2.42T,2.91T,12.64375,7.064583,5.961311
3,Mon,10:30am,JPY,Low,Economy Watchers Sentiment,42.6,44.7,45.1,42.60000,44.700000,45.100000
4,Mon,1:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,12.64375,7.064583,5.961311
...,...,...,...,...,...,...,...,...,...,...,...
104,Fri,7:30pm,USD,High,Prelim UoM Consumer Sentiment,,53.1,52.2,12.64375,53.100000,52.200000
105,Fri,,USD,High,Prelim UoM Inflation Expectations,,,6.5%,12.64375,7.064583,6.500000
106,Fri,8:30pm,GBP,Low,MPC Member Lombardelli Speaks,,,,12.64375,7.064583,5.961311
107,Sat,1:30am,USD,Low,TIC Long-Term Purchases,,44.2B,112.0B,12.64375,7.064583,5.961311


In [12]:
def rule_sentiment(row):
    event = row['event'].lower()
    actual = row['actual_val']
    forecast = row['forecast_val']

    # CPI: Rising inflation = Bearish
    if "cpi" in event:
        if actual > forecast:
            return 'bearish'
        elif actual < forecast:
            return 'bullish'
        else:
            return 'neutral'

    # NFP: Strong employment = Bullish
    elif "non-farm" in event or "nfp" in event:
        if actual > forecast:
            return 'bullish'
        elif actual < forecast:
            return 'bearish'
        else:
            return 'neutral'

    # FOMC / Rate events
    elif "fomc" in event or "interest rate" in event or "rate statement" in event:
        if "rate hike" in event:
            return 'bearish'
        elif "rate cut" in event:
            return 'bullish'
        else:
            return 'neutral'

    # Default for unknown event types
    else:
        return 'neutral'

df['rule_sentiment'] = df.apply(rule_sentiment, axis=1)

In [13]:
def build_input(row):
    return (
        f"EVENT: {row['event']} | "
        f"IMPACT (important): {row['impact']} | "
        f"FORECAST VALUE: {row['forecast_val']} | "
        f"PREVIOUS VALUE: {row['previous_val']} | "
        f"ACTUAL RESULT: {row['actual_val']}"
    )

In [14]:
df['finber_input'] = df.apply(build_input,axis=1)

In [15]:
df['finber_input']

Unnamed: 0,finber_input
0,EVENT: FOMC Member Barkin Speaks | IMPACT (imp...
1,EVENT: Bank Lending y/y | IMPACT (important): ...
2,EVENT: Current Account | IMPACT (important): L...
3,EVENT: Economy Watchers Sentiment | IMPACT (im...
4,EVENT: MPC Member Lombardelli Speaks | IMPACT ...
...,...
104,EVENT: Prelim UoM Consumer Sentiment | IMPACT ...
105,EVENT: Prelim UoM Inflation Expectations | IMP...
106,EVENT: MPC Member Lombardelli Speaks | IMPACT ...
107,EVENT: TIC Long-Term Purchases | IMPACT (impor...


In [16]:
classifier = pipeline("sentiment-analysis", model="ProsusAI/finbert")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


In [17]:
df['nlp_sentiment'] = df['finber_input'].apply(lambda x: classifier(x)[0]['label'])
df['nlp_confidence'] = df['finber_input'].apply(lambda x: classifier(x)[0]['score'])


In [18]:
def combine_sentiment(row):
    rule = row['rule_sentiment']
    nlp = row['nlp_sentiment'].lower()
    impact = row['impact'].lower()

    impact_weight = {'low': 1, 'medium': 2, 'high': 3}
    weight = impact_weight.get(impact, 1)

    if rule == nlp:
        return rule
    if weight >= 2:
        return rule
    return {
        'positive': 'bullish',
        'negative': 'bearish',
        'neutral': 'neutral'
    }.get(nlp, 'neutral')

df['market_sentiment'] = df.apply(combine_sentiment, axis=1)

In [19]:
print(df[['event', 'actual', 'forecast', 'impact', 'rule_sentiment', 'nlp_sentiment', 'market_sentiment']])

                                 event actual forecast impact rule_sentiment  \
0            FOMC Member Barkin Speaks   None     None    Low        neutral   
1                     Bank Lending y/y   2.4%     2.8%    Low        neutral   
2                      Current Account  2.72T    2.42T    Low        neutral   
3           Economy Watchers Sentiment   42.6     44.7    Low        neutral   
4        MPC Member Lombardelli Speaks   None     None    Low        neutral   
..                                 ...    ...      ...    ...            ...   
104      Prelim UoM Consumer Sentiment   None     53.1   High        neutral   
105  Prelim UoM Inflation Expectations   None     None   High        neutral   
106      MPC Member Lombardelli Speaks   None     None    Low        neutral   
107            TIC Long-Term Purchases   None    44.2B    Low        neutral   
108            FOMC Member Daly Speaks   None     None    Low        neutral   

    nlp_sentiment market_sentiment  
0 

In [20]:
df.to_csv('calender.csv')

In [21]:
import pickle

In [22]:
with open('combine_sentiment.pkl','wb') as file:
  pickle.dump(combine_sentiment,file)