In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import numpy as np

In [2]:
firm_data = pd.read_excel('Firm_default.xlsx')

In [3]:
firm_data.columns =['Company','Default','CF_coverage','RoA','liquidity','asset_turnover']

In [4]:
firm_data=firm_data.dropna()
firm_data

Unnamed: 0,Company,Default,CF_coverage,RoA,liquidity,asset_turnover
0,1,1,-45.0,-41.0,109.0,222.222222
1,2,1,-56.0,-31.0,151.0,625.0
2,3,1,6.0,2.0,101.0,250.0
3,4,1,-7.0,-9.0,145.0,384.615385
4,5,1,-10.0,-9.0,156.0,149.253731
5,6,1,-14.0,-7.0,71.0,357.142857
6,7,1,4.0,1.0,150.0,140.84507
7,8,1,-6.0,-6.0,137.0,250.0
8,9,1,7.0,-1.0,137.0,294.117647
9,10,1,-13.0,-14.0,142.0,227.272727


In [5]:
firm_data.describe()

Unnamed: 0,Company,Default,CF_coverage,RoA,liquidity,asset_turnover
count,46.0,46.0,46.0,46.0,46.0,46.0
mean,23.5,0.456522,9.695652,-0.695652,203.347826,289.745962
std,13.422618,0.50361,26.05112,12.395106,100.652806,160.346884
min,1.0,0.0,-56.0,-41.0,33.0,105.263158
25%,12.25,0.0,-6.75,-5.25,137.0,182.659933
50%,23.5,0.0,12.0,3.5,193.5,232.683983
75%,34.75,1.0,21.5,7.0,242.5,351.190476
max,46.0,1.0,58.0,14.0,506.0,769.230769


In [6]:
firm_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Company         46 non-null     int64  
 1   Default         46 non-null     int64  
 2   CF_coverage     46 non-null     float64
 3   RoA             46 non-null     float64
 4   liquidity       46 non-null     float64
 5   asset_turnover  46 non-null     float64
dtypes: float64(4), int64(2)
memory usage: 2.3 KB


In [7]:
# a) Run a logistic regression using asset_turnover as the only independent variable. Write out the fitted regression equation

model=smf.glm("Default ~ asset_turnover", data=firm_data,family=sm.families.Binomial()).fit()
print(model)

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper object at 0x000001F4C72B2A90>


In [8]:
model.summary()

0,1,2,3
Dep. Variable:,Default,No. Observations:,46.0
Model:,GLM,Df Residuals:,44.0
Model Family:,Binomial,Df Model:,1.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-31.711
Date:,"Sun, 19 Nov 2023",Deviance:,63.421
Time:,15:23:19,Pearson chi2:,46.0
No. Iterations:,4,Pseudo R-squ. (CS):,3.103e-06
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1679,0.617,-0.272,0.785,-1.376,1.040
asset_turnover,-2.23e-05,0.002,-0.012,0.990,-0.004,0.004


In [9]:
print(model.params)

Intercept        -0.167892
asset_turnover   -0.000022
dtype: float64


In [10]:
fz = 'The fitted regression model is ' + str(round(model.params[0],3)) + ' + ' + str(round(model.params[1],6)) +'*asset_turnover'
print(fz)

The fitted regression model is -0.168 + -2.2e-05*asset_turnover


In [11]:
# b) Comment on the economic significance of your independent variable even if the coefficient of the variable is statistically insignificant. [Hint: You may need to do some calculations here.]

# Odds ratio is the exponential of β
# An increase of 1 in asset turnover decreases the odds ratio by %2.23e-05
# P value is 0.990. It is greater than 0.05 so it is insignificant.
# The coefficient of asset_turnover is not statistically significant at 5% significance level.

upper_bound=-2.23e-05+(1.96*0.002)
lower_bound=-2.23e-05-(1.96*0.002)

odds_ratios = np.exp(model.params[1])
print('Odds ratio is ' + str(odds_ratios))
print('%95 confidence interval for the coefficient of asset_turnover is between '+ str(lower_bound) + ' and ' + str(upper_bound))

Odds ratio is 0.9999776981994056
%95 confidence interval for the coefficient of asset_turnover is between -0.0039423 and 0.0038977


In [12]:
# c) Run a logistic regression with the following independent variables: 
# CF_coverage, RoA, liquidity, asset_turnover.

model2=smf.glm("Default ~ CF_coverage + RoA + liquidity + asset_turnover", data=firm_data, family=sm.families.Binomial()).fit()
print(model2)

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper object at 0x000001F4C6949690>


In [13]:
model2.summary()

0,1,2,3
Dep. Variable:,Default,No. Observations:,46.0
Model:,GLM,Df Residuals:,41.0
Model Family:,Binomial,Df Model:,4.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-13.981
Date:,"Sun, 19 Nov 2023",Deviance:,27.962
Time:,15:23:23,Pearson chi2:,57.4
No. Iterations:,7,Pseudo R-squ. (CS):,0.5374
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7.5999,3.041,2.499,0.012,1.640,13.559
CF_coverage,-0.0758,0.059,-1.280,0.201,-0.192,0.040
RoA,0.0467,0.134,0.350,0.726,-0.215,0.308
liquidity,-0.0345,0.012,-2.781,0.005,-0.059,-0.010
asset_turnover,-0.0032,0.004,-0.755,0.450,-0.012,0.005


In [14]:
print(model2.params)

Intercept         7.599872
CF_coverage      -0.075824
RoA               0.046747
liquidity        -0.034478
asset_turnover   -0.003230
dtype: float64


In [15]:
#Given the regression result what is the probability of default for a firm with the following characteristics:
#CF_coverage is 12 , RoA is 7 , liquidity is 100 , asset_turnover is 400.

In [16]:
#CF_coverage is 12, RoA is 7 , liquidity is 100 , asset_turnover is 400.
1/(1 + np.exp(-model2.params[0]-model2.params[1]*12-model2.params[2]*7-model2.params[3]*100-model2.params[4]*400))

0.9069943744209957