In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, '../../../../')
from helpers.ipython_helpers import (
    print_full
)
from helpers.rating_model_helpers import (
    RatingModelTrainer
)
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import (
    f1_score,
    fbeta_score,
    recall_score,
    precision_score,
    roc_curve,
    roc_auc_score,
    classification_report,
    confusion_matrix
)
import itertools
matplotlib.style.use('ggplot')
%matplotlib inline

In [2]:
def get_base_predictions(df):
    default_prediction = int(df['next_is_investment_grade'].mode()[0])
    base_predictions = []
    prev_gvkey = None
    
    for index, row in df.iterrows():
        gvkey = row['GVKEY']
        if prev_gvkey == gvkey:
            base_predictions.append(row['next_is_investment_grade'])
        else:
            base_predictions.append(default_prediction)
            prev_gvkey = gvkey 

    return base_predictions
      
def get_random_predictions(df, test_df):
    value_series = df['is_investment_grade'].value_counts()/df.shape[0]
    value_dict = value_series.to_dict()
    
    values_list = []
    probability_list = []
    for value, probability in value_dict.items():
        values_list.append(value)
        probability_list.append(probability)
    
    return np.random.choice(values_list, size=test_df.shape[0], p=probability_list)

In [3]:
original_features_df = pd.read_csv("../../../../../data/credit_rating/intermediate_data/transformed_annual_credit_rating_features.csv")
del original_features_df["Unnamed: 0"]
del original_features_df["Unnamed: 0.1"]

In [7]:
original_features_df.head(30)

Unnamed: 0,GVKEY,datadate,company_name,year,month,CASHMTA_win,EXRET_AVG_win,MB_win,NIMTA_AVG_win,PRICE_win,...,TLMTA_win,issuer_rating,ranking,month_diff,next_rating,next_ranking,is_investment_grade,next_is_investment_grade,broad_ranking,next_broad_ranking
0,1004,1987-05-31,AAR CORP,1987,5,0.008611,0.034983,1.708065,0.026351,2.70805,...,0.265162,BBB,9,0,BBB,9.0,True,True,2,2
1,1004,1988-05-31,AAR CORP,1988,5,0.008763,0.050985,2.023136,0.026141,2.70805,...,0.266415,BBB,9,12,BBB,9.0,True,True,2,2
2,1004,1989-05-31,AAR CORP,1989,5,0.008032,0.004447,2.054581,0.02401,2.70805,...,0.281037,BBB,9,24,BBB,9.0,True,True,2,2
3,1004,1990-05-31,AAR CORP,1990,5,0.011098,-0.022577,1.590414,0.021466,2.70805,...,0.362615,BBB,9,36,BBB,9.0,True,True,2,2
4,1004,1991-05-31,AAR CORP,1991,5,0.012673,-0.021842,1.052213,0.021802,2.555287,...,0.483778,BBB,9,48,BBB,9.0,True,True,2,2
5,1004,1992-05-31,AAR CORP,1992,5,0.006178,-0.004513,1.100779,0.018469,2.639057,...,0.473193,BBB,9,60,BBB,9.0,True,True,2,2
6,1004,1993-05-31,AAR CORP,1993,5,0.010168,-0.018268,1.052478,0.012519,2.545531,...,0.478662,BBB,9,72,BBB,9.0,True,True,2,2
7,1004,1994-05-31,AAR CORP,1994,5,0.016633,0.027748,1.242187,0.008947,2.70805,...,0.467887,BBB,9,84,BBB-,10.0,True,True,2,2
8,1004,1995-05-31,AAR CORP,1995,5,0.021099,-0.038207,1.102044,0.011313,2.621039,...,0.512986,BBB-,10,96,BBB-,10.0,True,True,2,2
9,1004,1996-05-31,AAR CORP,1996,5,0.050162,0.006962,1.4341,0.015361,2.70805,...,0.409368,BBB-,10,108,BBB-,10.0,True,True,2,2


In [4]:
original_features_df['next_is_investment_grade'].value_counts()

True     16370
False    14543
Name: next_is_investment_grade, dtype: int64

In [None]:
trainer = RatingModelTrainer(original_features_df)

In [None]:
trainer.make_dev_and_test_df(threshold_year=2005, output_type='is_investment_grade')

In [None]:
base_predictions = get_base_predictions(trainer.test_df)
trainer.display_results(trainer.y_test, base_predictions)

In [None]:
df = trainer.dev_df

In [None]:
series = df['is_investment_grade'].value_counts()/df.shape[0]

In [None]:
series.index, series.values

In [None]:
random_predictions = get_random_predictions(trainer.dev_df, trainer.test_df)

In [None]:
test_df.loc[:, 'predicted_is_investment_grade'] = random_predictions

In [None]:
delta_df = test_df[(test_df['is_investment_grade'] != test_df['next_is_investment_grade']) | (test_df['is_investment_grade'] != test_df['predicted_is_investment_grade'])] 

In [None]:
trainer.display_results(delta_df['next_is_investment_grade'], delta_df['predicted_is_investment_grade'])