In [79]:
import pandas as pd
import numpy as np
import seaborn as sns
from pandasql import sqldf
from sklearn import linear_model
import statsmodels.api as sm
from scipy import stats

In [80]:
test = pd.read_sas('3v3updated.sas7bdat', encoding='latin-1')
test

Unnamed: 0,GVKEY,DATADATE,FYEAR,LPERMNO,SIC,AJEX,ACT,AT,CEQ,CHE,...,XINT,XSGA,UTFDOC,CSHO,CONSOL,INDFMT,DATAFMT,POPSRC,CURCD,COSTAT
0,001000,1970-12-31,1970.0,25881.0,3089,1.000000,21.351,33.450,10.544,1.660,...,0.850,9.420,,2.446,C,INDL,STD,D,USD,I
1,001000,1971-12-31,1971.0,25881.0,3089,1.000000,19.688,29.330,8.381,2.557,...,1.117,10.548,,2.995,C,INDL,STD,D,USD,I
2,001000,1972-12-31,1972.0,25881.0,3089,1.000000,11.326,19.907,7.021,2.027,...,0.784,7.551,,2.902,C,INDL,STD,D,USD,I
3,001000,1973-12-31,1973.0,25881.0,3089,1.000000,12.969,21.771,8.567,1.357,...,0.705,8.532,,2.840,C,INDL,STD,D,USD,I
4,001000,1974-12-31,1974.0,25881.0,3089,1.000000,19.473,25.638,9.843,1.338,...,0.817,8.859,,2.150,C,INDL,STD,D,USD,I
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350176,347007,2021-12-31,2021.0,15533.0,2836,1.000000,334.347,468.910,-242.173,317.116,...,14.849,,,397.830,C,INDL,STD,D,USD,A
350177,348892,2021-12-31,2021.0,20846.0,3550,0.100000,22.112,40.654,37.622,19.330,...,0.020,16.352,,12.577,C,INDL,STD,D,USD,A
350178,349972,2021-12-31,2021.0,15642.0,2836,1.000000,40.387,40.577,35.900,39.132,...,0.000,7.728,,8.259,C,INDL,STD,D,USD,A
350179,350681,2021-12-31,2021.0,22205.0,7374,1.000000,10243.231,10595.813,595.890,377.113,...,21.513,65.598,,933.361,C,INDL,STD,D,USD,I


### Noa and **dNoa**, (changes in) net operating assets
We measure net operating assets as operating assets minus operating liabilities. Operating assets are total assets (Compustat annual item AT) minus cash and short-term investment (item CHE). Operating liabilities are total assets minus debt included in current liabilities (item DLC, zero if missing), minus long-term debt (item DLTT, zero if missing), minus minority interests (item MIB, zero if missing), minus preferred stocks (item PSTK, zero if missing), and minus common equity (item CEQ). Noa is net operating assets scalded by 1-year-lagged total assets. Changes in net operating assets, dNoa, is the annual change in net operating assets scaled by 1-year-lagged total assets. At the end of June of each year t, we sort stocks into deciles based on Noa, and separately, on dNOA, for the fiscal year ending in calendar year t −1. Monthly decile returns are computed from July of year t to June of t +1, and the deciles are rebalanced in June of t +1

In [81]:
def fill_zero(var):
    temp = test[var].isna().sum()
    test[var].fillna(0, inplace=True)
    print("NAN values for " + str(var) + ": " + str(temp))
    temp = test[var].isna().sum()
    print("NAN values for " + str(var) + " filled: " + str(temp))

In [82]:
fill_zero("DLC")
fill_zero("DLTT")
fill_zero("MIB")
fill_zero("PSTK")

NAN values for DLC: 29818
NAN values for DLC filled: 0
NAN values for DLTT: 25976
NAN values for DLTT filled: 0
NAN values for MIB: 41252
NAN values for MIB filled: 0
NAN values for PSTK: 29876
NAN values for PSTK filled: 0


In [83]:
operating_assets = test["AT"] - test["CHE"]
operating_liabilities = test["AT"] - test["DLC"] - test["DLTT"] - test["MIB"] - test["PSTK"] - test["CEQ"]

Noa = operating_assets - operating_liabilities

lagged_total_assets = test["AT"].shift(1)  # 1-year-lagged total assets
dNoa = (Noa - Noa.shift(1)) / lagged_total_assets

test["dNoa"] = dNoa
test["dNoa"].fillna(0, inplace=True)

test["dNoa"]

0          0.000000
1         -0.095067
2         -0.238834
3          0.111318
4          0.101465
            ...    
350176     0.320635
350177    -0.144318
350178    -0.535396
350179    21.020208
350180    -0.080853
Name: dNoa, Length: 350181, dtype: float64

In [84]:
deciles_dNoa = pd.qcut(test["dNoa"], 10, labels=False, duplicates="drop")
nanvalues = deciles_dNoa.isna().sum()
allvalues = deciles_dNoa.count()

difference = nanvalues/allvalues
difference

  diff_b_a = subtract(b, a)


0.25000356959278086

In [85]:
# monthly_returns = pd.DataFrame()  # Assuming you have monthly returns data
# monthly_returns["Date"] = pd.date_range(start="2023-07-01", periods=12, freq="M")
# monthly_returns["Decile_dNoa"] = deciles_dNoa.values
# monthly_returns["Monthly_Return"] = 0.05

In [86]:
# test["dNoa"] = (test["AT"] - test["CHE"]) - (test["AT"] - test["DLC"] - test["DLTT"] - test["MIB"] - test["PSTK"] - test["CEQ"])
# test["dNoa"]

In [87]:
# test['dNoa_decile'] = pd.qcut(test['dNoa'], 10, labels=False)
# test['dNoa_decile']

### **Nsi**, net stock issues
At the end of June of year t, we measure net stock issues, Nsi, as the natural log of the ratio of the split-adjusted shares outstanding at the fiscal year ending in calendar year t −1 to the split-adjusted shares outstanding at the fiscal year ending in t −2. The split-adjusted shares outstanding is shares outstanding (Compustat annual item CSHO) times the adjustment factor (item AJEX). At the end of June of each year t, we sort stocks with negative Nsi into two portfolios (1 and 2), stocks with zero Nsi into 1 portfolio (3), and stocks with positive Nsi into seven portfolios (4 to 10). Monthly decile returns are from July of year t to June of t +1, and the deciles are rebalanced in June of t +1.

In [88]:
test["CSHO_t_minus_1_adjusted"] = test["CSHO"] * test["AJEX"]

test_shifted = test.shift(periods=1)
test["CSHO_t_minus_2_adjusted"] = test_shifted["CSHO"] * test_shifted["AJEX"]

test["Nsi"] = np.log(test["CSHO_t_minus_1_adjusted"] / test["CSHO_t_minus_2_adjusted"])
test["Nsi"].fillna(0, inplace=True)
test["Nsi"]

  result = getattr(ufunc, method)(*inputs, **kwargs)


0         0.000000
1         0.202490
2        -0.031544
3        -0.021596
4        -0.278336
            ...   
350176    2.898017
350177   -5.756740
350178    1.882019
350179    4.727489
350180   -9.722744
Name: Nsi, Length: 350181, dtype: float64

In [89]:
test["Nsi_f"] = np.where(test["Nsi"] < 0, np.where(test["Nsi"] < test["Nsi"].quantile(0.5), 1, 2),
                             np.where(test["Nsi"] == 0, 3,
                                      np.where(test["Nsi"] <= test["Nsi"].quantile(0.1), 4,
                                               np.where(test["Nsi"] <= test["Nsi"].quantile(0.2), 5,
                                                        np.where(test["Nsi"] <= test["Nsi"].quantile(0.3), 6,
                                                                 np.where(test["Nsi"] <= test["Nsi"].quantile(0.4), 7,
                                                                          np.where(test["Nsi"] <= test["Nsi"].quantile(0.5), 8,
                                                                                   np.where(test["Nsi"] <= test["Nsi"].quantile(0.6), 9, 10))))))))
test["Nsi_f"]

0          3
1         10
2          1
3          1
4          1
          ..
350176    10
350177     1
350178    10
350179    10
350180     1
Name: Nsi_f, Length: 350181, dtype: int32

### dNco, **dNca**, and dNcl, changes in net noncurrent operating assets, in noncurrent operating assets, and in noncurrent operating liabilities.
dNco is the change in net noncurrent operating assets. Net noncurrent operating assets are noncurrent operating assets (Nca) minus noncurrent operating liabilities (Ncl), with Nca = total assets (Compustat annual item AT) − current assets (item ACT) − long-term investments (item IVAO), and Ncl = total liabilities (item LT) − current liabilities (item LCT) − long-term debt (item DLTT). dNca is the change in noncurrent operating assets, and dNcl is the change in noncurrent operating liabilities. Missing changes in long-term investments and long-term debt are set to zero. At the end of June of each year t, we sort stocks into deciles based, on dNco, dNca, and dNcl for the fiscal year ending in calendar year t −1, all scaled by total assets for the fiscal year ending in calendar year t −2. Monthly decile returns are from July of year t to June of t +1, and the deciles are rebalanced in June of t +1.


In [90]:
fill_zero("IVAO")

NAN values for IVAO: 74024
NAN values for IVAO filled: 0


In [91]:
test["Nca"] = test["AT"] - test["ACT"] - test["IVAO"]
test["Nca"]

0          10.614
1           8.367
2           7.434
3           7.786
4           5.912
           ...   
350176    134.563
350177     17.187
350178      0.190
350179    352.582
350180      0.859
Name: Nca, Length: 350181, dtype: float64

In [92]:
#Need to calculate dNca
test["dNca"] = test["Nca"].diff()
test["dNca"].fillna(0, inplace=True)
test["dNca"]

0           0.000
1          -2.247
2          -0.933
3           0.352
4          -1.874
           ...   
350176     92.563
350177   -117.376
350178    -16.997
350179    352.392
350180   -351.723
Name: dNca, Length: 350181, dtype: float64

### dFin, dSti, dLti, **dFnl**, and dBe, changes in net financial assets, in short-term investments, in long-term investments, in financial liabilities, and in book equity. 
dFin is the change in net financial assets. Net financial assets are financial assets (Fna) minus financial liabilities (Fnl), with Fna = short-term investments (Compustat annual item IVST) + long-term investments (item IVAO), and Fnl = long-term debt (item DLTT) + debt in current liabilities (item DLC) + preferred stock (item PSTK). dSti is the change in short-term investments, dLti is the change in long-term investments, and dFnl is the change in financial liabilities. dBe is the change in book equity (item CEQ). Missing changes in debt in current liabilities, long-term investments, long-term debt, short-term investments, and preferred stocks are set to zero (at least 1 change must be non-missing). When constructing dSti (dLti), we exclude firms that do not have short-term (long-term) investments in the past two fiscal years. At the end of June of each year t, we sort stocks into deciles based, separately, on dFin, dSti, dLti, dFnl, and dBe for the fiscal year ending in calendar year t −1, all scaled by total assets (item AT) for the fiscal year ending in calendar year t −2. Monthly decile returns are calculated from July of year t to June of t +1, and the deciles are rebalanced in June of t +1. For sufficient data coverage, the dSti portfolios start in July 1971.

In [93]:
fill_zero("DLTT")
fill_zero("DLC")
fill_zero("PSTK")

NAN values for DLTT: 0
NAN values for DLTT filled: 0
NAN values for DLC: 0
NAN values for DLC filled: 0
NAN values for PSTK: 0
NAN values for PSTK filled: 0


In [94]:
test["Fnl"] = test["DLTT"] + test["DLC"] + test["PSTK"]
test["Fnl"]

0          13.295
1          13.175
2           7.000
3           7.000
4           7.914
           ...   
350176    645.664
350177      0.411
350178      0.169
350179    631.097
350180      0.000
Name: Fnl, Length: 350181, dtype: float64

In [95]:
test["dFnl"] = test["Fnl"].diff()
test["dFnl"].fillna(0, inplace=True)
test["dFnl"]

0           0.000
1          -0.120
2          -6.175
3           0.000
4           0.914
           ...   
350176    620.664
350177   -645.253
350178     -0.242
350179    630.928
350180   -631.097
Name: dFnl, Length: 350181, dtype: float64

In [96]:
# dlc = test['DLC'].isna().sum()
# test['DLC'].fillna(0, inplace=True)
# print("NAN values: " + str(dlc))
# dlc = test['DLC'].isna().sum()
# print("NAN values: " + str(dlc))

Credit default risk / credit risk premium

In [97]:
import pandas as pd

def calculate_credit_metrics(compustat_data):
    credit_metrics_df = pd.DataFrame()

    # Rename 
    credit_metrics_df["Total_Assets"] = compustat_data["AT"]
    credit_metrics_df["Current_Liabilities"] = compustat_data["LCO"]
    credit_metrics_df["Long_Term_Debt"] = compustat_data["DLTT"]
    credit_metrics_df["Total_Liabilities"] = compustat_data["LT"]
    credit_metrics_df["Preferred_Stock"] = compustat_data["PSTK"]
    credit_metrics_df["Interest_Expense"] = compustat_data["XINT"]
    credit_metrics_df["Cash_Short_Term_Investments"] = compustat_data["CHE"]
    credit_metrics_df["Net_Income"] = compustat_data["NI"]
    credit_metrics_df["Inventory"] = compustat_data["INVT"]
    credit_metrics_df["Total_Equity"] = compustat_data["CEQ"]

    # Calculate financial ratios
    credit_metrics_df["Debt_Equity_Ratio"] = credit_metrics_df["Total_Liabilities"] / credit_metrics_df["Total_Equity"]
    credit_metrics_df["Debt_Ratio"] = credit_metrics_df["Total_Liabilities"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Current_Ratio"] = credit_metrics_df["Current_Liabilities"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Quick_Ratio"] = (credit_metrics_df["Current_Liabilities"] - credit_metrics_df["Inventory"]) / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Interest_Coverage_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Interest_Expense"]
    credit_metrics_df["Return_On_Assets"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Return_On_Equity"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Equity"]
    credit_metrics_df["Asset_Turnover_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Inventory_Turnover_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Inventory"]

    return credit_metrics_df

calculated_credit_metrics = calculate_credit_metrics(test)
print(calculated_credit_metrics)


        Total_Assets  Current_Liabilities  Long_Term_Debt  Total_Liabilities  \
0             33.450                0.763           0.917             22.906   
1             29.330                1.195          10.318             20.948   
2             19.907                1.172           7.000             12.886   
3             21.771                0.826           7.000             13.204   
4             25.638                2.624           7.000             15.381   
...              ...                  ...             ...                ...   
350176       468.910               55.330         343.417            712.823   
350177        40.654                2.239           0.246              3.032   
350178        40.577                1.870           0.073              4.677   
350179     10595.813               95.997           7.499           9999.222   
350180         9.357                7.777           0.000              8.700   

        Preferred_Stock  Interest_Expen

In [98]:
def calculate_credit_score(credit_metrics_df):
    # Define weights for each financial metric
    weights = {
        "Debt_Equity_Ratio": 0.2,
        "Debt_Ratio": 0.1,
        "Current_Ratio": 0.1,
        "Quick_Ratio": 0.1,
        "Interest_Coverage_Ratio": 0.1,
        "Return_On_Assets": 0.1,
        "Return_On_Equity": 0.1,
        "Asset_Turnover_Ratio": 0.1,
        "Inventory_Turnover_Ratio": 0.1
    }

    # Normalize the metrics to ensure they are comparable
    # normalized_metrics = credit_metrics_df.div(credit_metrics_df.max())

    # Calculate the weighted sum of normalized metrics
    credit_score = (credit_metrics_df * pd.Series(weights)).sum(axis=1)

    return credit_score

credit_score = calculate_credit_score(calculated_credit_metrics)
print(credit_score)


0         0.556781
1         0.272252
2         0.804507
3         0.686285
4         0.710612
            ...   
350176        -inf
350177        -inf
350178        -inf
350179    5.029691
350180        -inf
Length: 350181, dtype: float64


  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


higher credit score indicates lower credit risk

In [99]:
test["Credit Rating"] = credit_score
test["Credit Rating"]

0         0.556781
1         0.272252
2         0.804507
3         0.686285
4         0.710612
            ...   
350176        -inf
350177        -inf
350178        -inf
350179    5.029691
350180        -inf
Name: Credit Rating, Length: 350181, dtype: float64

In [100]:
print(test.columns)

Index(['GVKEY', 'DATADATE', 'FYEAR', 'LPERMNO', 'SIC', 'AJEX', 'ACT', 'AT',
       'CEQ', 'CHE', 'DCPSTK', 'DLC', 'DLTT', 'INVT', 'IVAO', 'LCO', 'LT',
       'MIB', 'PSTK', 'NI', 'XINT', 'XSGA', 'UTFDOC', 'CSHO', 'CONSOL',
       'INDFMT', 'DATAFMT', 'POPSRC', 'CURCD', 'COSTAT', 'dNoa',
       'CSHO_t_minus_1_adjusted', 'CSHO_t_minus_2_adjusted', 'Nsi', 'Nsi_f',
       'Nca', 'dNca', 'Fnl', 'dFnl', 'Credit Rating'],
      dtype='object')


In [101]:
selected = ["GVKEY", "DATADATE", "FYEAR", "LPERMNO", "dNoa", "Nsi", "dNca", "dFnl", "Credit Rating"]
result = test[selected].copy()
result

Unnamed: 0,GVKEY,DATADATE,FYEAR,LPERMNO,dNoa,Nsi,dNca,dFnl,Credit Rating
0,001000,1970-12-31,1970.0,25881.0,0.000000,0.000000,0.000,0.000,0.556781
1,001000,1971-12-31,1971.0,25881.0,-0.095067,0.202490,-2.247,-0.120,0.272252
2,001000,1972-12-31,1972.0,25881.0,-0.238834,-0.031544,-0.933,-6.175,0.804507
3,001000,1973-12-31,1973.0,25881.0,0.111318,-0.021596,0.352,0.000,0.686285
4,001000,1974-12-31,1974.0,25881.0,0.101465,-0.278336,-1.874,0.914,0.710612
...,...,...,...,...,...,...,...,...,...
350176,347007,2021-12-31,2021.0,15533.0,0.320635,2.898017,92.563,620.664,-inf
350177,348892,2021-12-31,2021.0,20846.0,-0.144318,-5.756740,-117.376,-645.253,-inf
350178,349972,2021-12-31,2021.0,15642.0,-0.535396,1.882019,-16.997,-0.242,-inf
350179,350681,2021-12-31,2021.0,22205.0,21.020208,4.727489,352.392,630.928,5.029691
