# Code for the 2022 IMI Big Data and Artificial Intelligence Case Competition
In this Git, we conducted several analyses to predict the probability of corporate default from financial data of Canadian medium-sized entities. Any sensitive information (e.g., customer ID info) has been removed or modified as necessary.
The three primary methods to tackle this problem are logisitc regression, support vector machine (SVM), and XGBoost to assess the following specific problems:
1. Binary classification to predict good (investment grade) vs bad (non-investment grade) credit rating
2. Conduct multi-class classification to predict the credit rating of a medium-sized entity ranging from 1 (the best credit and least likely to default on a loan) to 17 (the worst credit and most likely to default on a loan)

# Major highlights of results
1. Cleaned the data by removing missing values and outliers
2. Engineered new features and identified key features using recurssive feature elimination
3. Achieved good performance for both binary and multiclass classification through hyperparameter tuning
4. Discovered data structure using clustering analysis


In [1]:
## Importing necessart packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, roc_auc_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold

# Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold

In [None]:
data_rest

Unnamed: 0,CUSTOMER_ID,Corp_Residence_Country_Code,BR Code,Final_IG,B_PLUS_FLAG,EBITDA_Y0,TOTALASSET_Y0,TOTALDEBT_Y0,TOTALREVENUE_Y0,CURRENTASSET_Y0,FIXEDASSET_Y0,CURRENTLIABILITY_Y0,NONCURRLIA_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,CAPITALIZATION_Y0,TOTINTEXP_Y0,LEASERENTEXP_Y0,EBITDAR_Y0,receivabledays_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,Capex2Dep_Y0,tangibleNetWorth_Y0,FIXEDCHARGECOV_Y0,DEBTSERVCOV_Y0,NETPROFIT_Y0,NETSALES_Y0,ASSETTURNOVER_Y0,OPERPROFIT_Y0,ARTurnover_Y0,Inventory_Y0,RETAINED_EARNINGS_Y0,FCF_Y0,CCE_Y0,CURRENT_RATIO,ACID_RATIO,CASH_RATIO,CASH_RATIO2,WCL,LEVERAGE,SR,SBTL,FCF,OPE,ROA,OROA,ROE,CAPTIAL_STRUCTURE,SHORTTERM_LEVERAGE,LIQUIDITY,DEBT_SERVICE_CAPACITY,EFFICIENCY,ACID,DEBT_COVERAGE,EBITDA_RATIO
15064,7197,0,12,5,1,584049.283266,581480.519228,579760.653515,598072.539861,631202.637553,617620.219257,588931.143637,586939.385326,622291.098255,589093.172412,589688.943720,592642.247410,593908.557832,589321.799493,592594.608167,6.019521e+05,6.026610e+05,595788.665099,575232.681279,580132.634373,618102.843472,581547.797375,581918.094286,613449.380181,602830.813185,615353.057205,614218.349444,599429.779447,596989.928655,574070.138920,1.071777,0.028839,1.025887,2.048305,0.500847,0.494511,0.938547,0.961082,0.507700,1.007956,1.000753,1.036717,0.935122,1.070184,0.999725,0.987256,0.494568,0.972370,2.019630,0.985501,1.003662
1165,2169,0,2,6,1,585086.697359,623001.989833,615216.768985,591032.019597,602671.213574,597622.175921,629404.268510,594071.199825,627349.916649,602893.916606,624545.642651,573853.085792,621679.205377,606193.615732,575788.400609,1.035435e+09,1.029303e+09,620666.005871,615684.721347,616960.232772,618414.513239,608970.883017,577912.503505,606628.719787,585701.182813,601322.337239,626096.640888,605027.400110,596928.965583,595995.720479,0.957526,-0.037218,1.056055,2.052826,0.514440,0.509207,0.932632,0.942109,0.487896,0.990980,0.927625,0.940127,0.921196,1.006979,1.043972,0.956651,0.497739,1.030352,1.902304,1.019576,1.012414
5212,13211,0,12,6,1,572075.240846,590811.152226,626769.664251,591813.100069,571870.103470,622088.519165,577338.499861,618360.331411,608600.325876,603791.147632,602047.158260,623998.907729,580915.073078,585198.844075,578308.651829,6.231224e+05,5.879037e+05,575917.297253,628511.589031,586941.828106,603385.843622,624524.640624,619877.432318,613568.666909,597301.266040,584117.678813,607645.471482,620481.811848,579650.347092,621500.157788,0.990528,-0.061966,0.928943,1.923891,0.482846,0.494114,0.939985,0.972417,0.484780,1.009273,1.049197,1.010985,1.018530,1.030110,0.956189,1.051944,0.522309,1.055273,2.088234,0.916789,0.922884
13076,13642,0,49,7,1,584288.546366,582602.991331,577695.450678,592999.228539,623671.810840,577215.417814,584671.424858,608165.004919,613583.836873,592663.877103,628351.576779,619707.686144,613526.504563,618348.501069,613928.092168,5.850652e+05,5.766997e+05,601659.560944,586268.423533,571706.929167,601547.147572,597664.597843,592953.391108,606241.374751,610570.020399,606008.957714,611357.445966,627008.086584,581413.481534,590361.759532,1.066705,0.021062,0.990361,2.020518,0.490152,0.488418,0.952255,0.966452,0.487421,1.029630,1.017766,1.048004,0.966377,1.053177,0.986514,1.013317,0.501045,1.007867,2.046227,0.942845,0.985387
15344,1929,0,0,8,1,598068.016018,572554.806166,578025.964947,594390.977992,573773.998111,576707.764476,616206.363499,616723.838948,594588.031273,590050.901191,610097.033760,603012.375167,593276.168072,571281.959740,586606.828204,9.526740e+08,9.909234e+08,573297.701796,613822.180836,628427.164183,609245.892155,613713.913580,591951.699520,590717.802557,602362.629600,615973.143708,617496.814174,625003.525181,607558.197642,579851.430042,0.931139,-0.070955,1.062697,2.126286,0.499790,0.464385,1.005853,0.999669,0.492776,1.013411,1.033878,1.052061,0.995566,1.038482,1.044327,1.012744,0.497769,1.032509,1.940624,0.991801,1.010332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11638,2175,1,5,6,1,619635.418356,579351.266748,618265.229811,617254.404579,594332.117099,594739.434661,609477.020588,581545.897906,623527.163387,583953.694721,627300.695876,589529.354124,618551.133263,627667.518474,571784.702346,5.856975e+05,5.980062e+05,590362.816110,624130.586996,587835.551457,586464.965890,591933.666617,608715.854751,585501.055011,601064.462446,570599.865622,621016.924034,600501.393452,608904.595630,593623.274962,0.975151,-0.043783,1.026707,2.006362,0.511726,0.486432,0.993758,0.989940,0.511245,0.973771,1.050685,1.037478,0.976246,1.076251,1.043708,1.024634,0.496996,0.958978,1.910200,1.051068,1.017939
16843,349,1,5,8,1,608613.889184,589910.085429,581658.600057,623809.425121,609269.453401,616088.076776,611393.195237,592318.925320,591642.276541,607938.516186,576193.280618,609668.984031,571455.065829,584833.996005,599175.602574,9.693433e+08,1.017279e+09,601693.194553,602367.123875,573808.789396,620942.225323,581816.025252,601130.662609,570405.835615,612251.027533,569408.868181,614890.861986,598368.132861,582000.791172,598772.706552,0.996526,-0.009194,1.021077,2.010299,0.507923,0.490076,1.028686,1.054369,0.483505,0.981471,1.019021,1.037872,1.016037,1.002936,1.005683,1.015024,0.483351,0.932682,1.910688,0.998269,1.012449
15826,6876,0,0,7,1,572241.028270,624116.665341,584183.066034,617153.857710,616122.552865,599720.773547,600414.084436,606702.643082,601608.626481,569952.262044,582590.368436,580765.766582,602668.191877,571439.565436,595744.037848,5.868190e+05,5.983270e+05,573488.941474,584060.791694,612908.502237,575764.390958,570756.382470,572546.379066,603084.704692,625553.860513,621832.182726,625772.265396,592393.038924,620667.809004,615567.850755,1.026163,-0.016072,0.975382,1.960981,0.497395,0.517031,0.951185,1.025839,0.514174,1.013611,0.917371,1.002303,0.951692,0.963936,1.053446,0.986303,0.472826,0.924820,2.060911,0.985322,0.999467
4743,4815,0,12,6,1,580794.797106,632422.856322,584455.451951,589059.432845,621664.619957,631637.823536,599517.885605,620519.401687,604594.912514,615105.152801,601660.922404,604727.036299,595056.273802,569372.843969,625462.029865,5.957418e+05,5.974133e+05,588866.763177,599785.331550,575522.141240,627531.807464,622575.366898,618710.597667,613249.015206,600840.728924,617964.028240,626669.056076,576452.065449,584066.444979,612511.112591,1.036941,-0.008347,0.978787,1.991861,0.491393,0.518364,0.960635,0.974304,0.478728,1.020000,0.978318,0.950062,1.023347,0.955998,0.974659,0.968515,0.510292,1.056897,2.052441,0.960425,0.938718


In [None]:
X = data_rest[features].drop(['CUSTOMER_ID', 'B_PLUS_FLAG'], axis=1)
y = data_rest[['B_PLUS_FLAG']]

In [None]:
columns = X.columns
scaler = StandardScaler()
X[columns[2:]] = scaler.fit_transform(X[columns[2:]])
X = pd.get_dummies(X)
X

Unnamed: 0,TOTALASSET_Y0,TOTALREVENUE_Y0,CURRENTLIABILITY_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,OPERPROFIT_Y0,SBTL,OPE,OROA,SHORTTERM_LEVERAGE,Corp_Residence_Country_Code_0,Corp_Residence_Country_Code_1,BR Code_0,BR Code_1,BR Code_2,BR Code_3,BR Code_4,BR Code_5,BR Code_6,BR Code_7,BR Code_8,BR Code_9,BR Code_10,BR Code_11,BR Code_12,BR Code_13,BR Code_14,BR Code_15,BR Code_16,BR Code_18,BR Code_19,BR Code_20,BR Code_21,BR Code_22,BR Code_23,BR Code_24,BR Code_25,BR Code_26,...,BR Code_48,BR Code_49,BR Code_50,BR Code_51,BR Code_52,BR Code_53,BR Code_54,BR Code_55,BR Code_56,BR Code_57,BR Code_58,BR Code_59,BR Code_60,BR Code_62,BR Code_63,BR Code_64,BR Code_66,BR Code_67,BR Code_68,BR Code_69,BR Code_71,BR Code_72,BR Code_74,BR Code_75,BR Code_76,BR Code_77,BR Code_78,BR Code_80,BR Code_81,BR Code_82,BR Code_83,BR Code_87,BR Code_90,BR Code_91,BR Code_92,BR Code_93,BR Code_94,BR Code_96,BR Code_101,BR Code_107
15064,-1.221781,-0.481644,-0.675530,1.206254,-0.704857,-0.439775,-0.439766,0.235864,-1.159346,0.512906,1.100794,0.005765,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1165,0.857416,-0.810716,1.619715,1.490927,0.069063,2.362834,2.352091,-0.744955,-1.580262,0.128767,-1.137997,1.083693,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5212,-0.754548,-0.774209,-1.332953,0.435838,0.119378,-0.439718,-0.439806,-0.080750,-0.907873,0.542719,0.504368,-1.054842,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13076,-1.165573,-0.718770,-0.917101,0.716273,-0.504618,-0.439821,-0.439836,0.679000,-1.040206,1.003374,1.362395,-0.316067,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
15344,-1.668739,-0.653719,0.871257,-0.352671,-0.651149,2.138695,2.247930,0.209056,-0.303278,0.636357,1.456441,1.092357,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11638,-1.328404,0.414911,0.489633,1.275811,-0.993069,-0.439819,-0.439778,0.134725,-0.519113,-0.260664,1.118440,1.077260,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16843,-0.799669,0.721292,0.598300,-0.518437,0.351955,2.183840,2.319458,0.775253,0.910281,-0.086416,1.127556,0.150903,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
15826,0.913234,0.410212,-0.024329,0.042396,-1.778244,-0.439816,-0.439777,1.536954,0.277334,0.640869,0.303126,1.314506,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4743,1.329169,-0.902915,-0.075153,0.210443,0.753847,-0.439792,-0.439780,0.121914,-0.865995,0.785453,-0.907735,-0.604879,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
model = LogisticRegression(max_iter = 10000, class_weight='balanced')
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
penalty = ['none', 'l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]

In [None]:
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='roc_auc',error_score=0, verbose=True)
grid_result = grid_search.fit(X, y)

In [None]:
grid_result.best_score_

0.6373929276665284

In [None]:
grid_result.best_params_

{'C': 1.0, 'penalty': 'l2', 'solver': 'lbfgs'}

# Final model - binary

Use the parameters and features to run a final model on all of the data

In [None]:
data_final = data_short[features]
data_final

Unnamed: 0,CUSTOMER_ID,Corp_Residence_Country_Code,BR Code,B_PLUS_FLAG,TOTALASSET_Y0,TOTALREVENUE_Y0,CURRENTLIABILITY_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,OPERPROFIT_Y0,SBTL,OPE,OROA,SHORTTERM_LEVERAGE
0,1576,1,5,1,619611.510181,611355.411490,578103.988392,596259.219747,578045.252633,5.904728e+05,5.829171e+05,575870.723892,1.025318,0.941957,0.929406,1.000102
1,648,0,12,1,608391.483917,608591.366569,603655.551037,577735.145730,585900.805747,5.837973e+05,6.235412e+05,613853.875298,1.053409,1.008647,1.008978,1.030303
2,13395,0,19,1,591791.219692,615999.168162,620046.519867,601679.290137,609409.181664,5.756264e+05,6.270374e+05,573787.307640,1.023800,0.931474,0.969577,1.017455
3,13610,0,21,1,605734.046655,580268.738890,594459.641503,619479.765547,604536.927056,6.029364e+05,5.912152e+05,623917.451440,0.936703,1.075222,1.030019,0.983331
4,11392,1,5,1,622028.317955,620551.783461,592262.918727,583194.794576,621499.359273,5.791048e+05,6.255971e+05,586550.570176,1.064056,0.945208,0.942964,0.952958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16993,9483,1,5,1,597402.203485,583882.137797,628568.237529,579529.000652,602542.832781,9.827803e+08,9.620414e+08,609391.425458,1.007512,1.043689,1.020069,1.043193
16994,3969,0,32,1,628492.484307,597562.025539,585213.601861,585022.194173,604997.849904,6.193316e+05,6.015298e+05,578315.986698,1.021435,0.967792,0.920164,0.967299
16995,1190,0,4,0,594550.303034,583051.296070,578930.931594,598561.236938,601354.290649,6.038162e+05,6.180281e+05,563966.077578,0.974088,0.967267,0.948559,0.962712
16996,11429,1,5,1,588957.530051,575155.953282,626738.433905,616227.480258,599080.625400,5.910354e+05,5.782919e+05,583940.329983,0.933350,1.015273,0.991481,1.046167


In [None]:
X = data_final.drop(['CUSTOMER_ID', 'B_PLUS_FLAG'], axis=1)
y = data_final['B_PLUS_FLAG']

X[X.columns[2:]] = StandardScaler().fit_transform(X[X.columns[2:]])
X = pd.get_dummies(X)
X

Unnamed: 0,TOTALASSET_Y0,TOTALREVENUE_Y0,CURRENTLIABILITY_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,OPERPROFIT_Y0,SBTL,OPE,OROA,SHORTTERM_LEVERAGE,Corp_Residence_Country_Code_0,Corp_Residence_Country_Code_1,BR Code_0,BR Code_1,BR Code_2,BR Code_3,BR Code_4,BR Code_5,BR Code_6,BR Code_7,BR Code_8,BR Code_9,BR Code_10,BR Code_11,BR Code_12,BR Code_13,BR Code_14,BR Code_15,BR Code_16,BR Code_18,BR Code_19,BR Code_20,BR Code_21,BR Code_22,BR Code_23,BR Code_24,BR Code_25,BR Code_26,...,BR Code_48,BR Code_49,BR Code_50,BR Code_51,BR Code_52,BR Code_53,BR Code_54,BR Code_55,BR Code_56,BR Code_57,BR Code_58,BR Code_59,BR Code_60,BR Code_62,BR Code_63,BR Code_64,BR Code_66,BR Code_67,BR Code_68,BR Code_69,BR Code_71,BR Code_72,BR Code_74,BR Code_75,BR Code_76,BR Code_77,BR Code_78,BR Code_80,BR Code_81,BR Code_82,BR Code_83,BR Code_87,BR Code_90,BR Code_91,BR Code_92,BR Code_93,BR Code_94,BR Code_96,BR Code_101,BR Code_107
0,0.688847,0.147698,-1.292758,-0.269246,-1.316478,-0.431814,-0.431836,-1.315719,0.281463,-0.993018,-1.393289,0.011192,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.125437,0.018535,0.156738,-1.306328,-0.877919,-0.431832,-0.431724,0.863130,0.906803,0.519530,0.456123,0.744457,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,-0.708140,0.364699,1.086569,0.034200,0.434506,-0.431854,-0.431715,-1.435231,0.247663,-1.230780,-0.459635,0.432518,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,-0.008006,-1.304973,-0.364930,1.030771,0.162498,-0.431779,-0.431813,1.440412,-1.691231,2.029465,0.945142,-0.395991,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.810206,0.577442,-0.489546,-1.000666,1.109476,-0.431845,-0.431719,-0.703085,1.143817,-0.919289,-1.078168,-1.133400,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16993,-0.426386,-1.136120,1.569991,-1.205898,0.051171,2.265041,2.208614,0.607148,-0.114939,1.314297,0.713888,1.057396,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16994,1.134802,-0.496863,-0.889442,-0.898358,0.188230,-0.431734,-0.431785,-1.175450,0.195013,-0.407069,-1.608099,-0.785230,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16995,-0.569594,-1.174945,-1.245847,-0.140366,-0.015182,-0.431777,-0.431740,-1.998612,-0.858995,-0.418993,-0.948138,-0.896591,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16996,-0.850433,-1.543892,1.466190,0.848690,-0.142117,-0.431812,-0.431849,-0.852818,-1.765879,0.669810,0.049455,1.129613,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
model = LogisticRegression(max_iter=1000, class_weight='balanced', C=1.0, penalty='l2', solver='lbfgs')
model.fit(X,y)

LogisticRegression(class_weight='balanced', max_iter=1000, solver='sag')

# Multiclass Classification

Using the parameters determined above to run multicalss classification

In [None]:
features = ['CUSTOMER_ID',
 'Corp_Residence_Country_Code',
 'BR Code',
 'Final_IG',
 'TOTALASSET_Y0',
 'TOTALREVENUE_Y0',
 'CURRENTLIABILITY_Y0',
 'TOTALEQUITY_Y0',
 'TOTALNETWORTH_Y0',
 'INVENTORYDAYS_Y0',
 'payableDAYS_Y0',
 'OPERPROFIT_Y0',
 'SBTL',
 'OPE',
 'OROA',
 'SHORTTERM_LEVERAGE']

In [None]:
data_multi = data_short[features]
data_multi

Unnamed: 0,CUSTOMER_ID,Corp_Residence_Country_Code,BR Code,Final_IG,TOTALASSET_Y0,TOTALREVENUE_Y0,CURRENTLIABILITY_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,OPERPROFIT_Y0,SBTL,OPE,OROA,SHORTTERM_LEVERAGE
0,1576,1,5,5,619611.510181,611355.411490,578103.988392,596259.219747,578045.252633,5.904728e+05,5.829171e+05,575870.723892,1.025318,0.941957,0.929406,1.000102
1,648,0,12,8,608391.483917,608591.366569,603655.551037,577735.145730,585900.805747,5.837973e+05,6.235412e+05,613853.875298,1.053409,1.008647,1.008978,1.030303
2,13395,0,19,5,591791.219692,615999.168162,620046.519867,601679.290137,609409.181664,5.756264e+05,6.270374e+05,573787.307640,1.023800,0.931474,0.969577,1.017455
3,13610,0,21,6,605734.046655,580268.738890,594459.641503,619479.765547,604536.927056,6.029364e+05,5.912152e+05,623917.451440,0.936703,1.075222,1.030019,0.983331
4,11392,1,5,8,622028.317955,620551.783461,592262.918727,583194.794576,621499.359273,5.791048e+05,6.255971e+05,586550.570176,1.064056,0.945208,0.942964,0.952958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16993,9483,1,5,8,597402.203485,583882.137797,628568.237529,579529.000652,602542.832781,9.827803e+08,9.620414e+08,609391.425458,1.007512,1.043689,1.020069,1.043193
16994,3969,0,32,8,628492.484307,597562.025539,585213.601861,585022.194173,604997.849904,6.193316e+05,6.015298e+05,578315.986698,1.021435,0.967792,0.920164,0.967299
16995,1190,0,4,12,594550.303034,583051.296070,578930.931594,598561.236938,601354.290649,6.038162e+05,6.180281e+05,563966.077578,0.974088,0.967267,0.948559,0.962712
16996,11429,1,5,3,588957.530051,575155.953282,626738.433905,616227.480258,599080.625400,5.910354e+05,5.782919e+05,583940.329983,0.933350,1.015273,0.991481,1.046167


## Gridsearch for multiclass

In [None]:
data_grid, data_rest = train_test_split(data_multi, test_size=0.5, random_state=42)

In [None]:
X = data_grid.drop(['CUSTOMER_ID', 'Final_IG'], axis=1)
y = data_grid[['Final_IG']]

In [None]:
columns = X.columns
scaler = StandardScaler()
X[columns[2:]] = scaler.fit_transform(X[columns[2:]])
X = pd.get_dummies(X)
X

Unnamed: 0,TOTALASSET_Y0,TOTALREVENUE_Y0,CURRENTLIABILITY_Y0,TOTALEQUITY_Y0,TOTALNETWORTH_Y0,INVENTORYDAYS_Y0,payableDAYS_Y0,OPERPROFIT_Y0,SBTL,OPE,OROA,SHORTTERM_LEVERAGE,Corp_Residence_Country_Code_0,Corp_Residence_Country_Code_1,BR Code_0,BR Code_1,BR Code_2,BR Code_3,BR Code_4,BR Code_5,BR Code_6,BR Code_7,BR Code_8,BR Code_9,BR Code_10,BR Code_11,BR Code_12,BR Code_13,BR Code_14,BR Code_15,BR Code_16,BR Code_18,BR Code_19,BR Code_20,BR Code_21,BR Code_22,BR Code_23,BR Code_24,BR Code_25,BR Code_26,...,BR Code_48,BR Code_49,BR Code_50,BR Code_51,BR Code_52,BR Code_53,BR Code_54,BR Code_55,BR Code_56,BR Code_57,BR Code_58,BR Code_59,BR Code_60,BR Code_62,BR Code_63,BR Code_64,BR Code_66,BR Code_67,BR Code_68,BR Code_69,BR Code_71,BR Code_72,BR Code_74,BR Code_75,BR Code_76,BR Code_77,BR Code_78,BR Code_80,BR Code_81,BR Code_82,BR Code_83,BR Code_87,BR Code_90,BR Code_91,BR Code_92,BR Code_93,BR Code_94,BR Code_96,BR Code_101,BR Code_107
4281,-0.189184,0.888441,0.448487,2.639130,-0.333064,-0.429263,-0.429319,1.479175,-1.017470,0.234056,1.123343,0.541193,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2546,0.607340,0.021667,0.721777,0.972673,1.511577,-0.429289,-0.429296,-1.407548,-0.638477,-0.959333,-1.406698,-0.575867,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12519,3.030980,-0.209422,0.985212,-0.762920,-0.214756,-0.429281,-0.429283,-1.152433,0.338509,-0.617539,-2.825936,0.834814,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6078,-0.196111,-0.314982,1.267437,-0.097376,1.892250,-0.429392,-0.429328,1.427739,-0.201490,1.162710,1.094027,-0.464900,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8382,1.910833,-0.013224,1.113340,0.310088,-0.253241,-0.429309,-0.429305,-1.343510,-0.235755,-0.891455,-2.242821,0.954925,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14392,-0.684534,1.073275,1.184994,0.042449,-1.082153,2.198285,2.389856,-1.425097,0.796900,-1.729802,-0.474079,1.649729,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5781,0.730631,-1.301289,-1.328214,0.379140,-1.343801,-0.429277,-0.429402,1.565919,-1.284092,2.107912,0.463647,0.010789,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
924,2.615355,2.654289,1.301012,2.035512,-0.997945,-0.429381,-0.429338,-0.918999,0.659285,-2.473257,-2.425370,1.667268,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16943,0.671132,-0.718367,-0.298508,-1.359586,0.549729,-0.429407,-0.429424,-1.222222,0.346294,-0.266749,-1.329145,-0.618341,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
model = LogisticRegression(max_iter = 10000, class_weight='balanced')
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
penalty = ['none', 'l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]

In [None]:
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring=my_scorer,error_score=0, verbose=True)
grid_result = grid_search.fit(X, y)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits




In [None]:
grid_result.best_score_

In [None]:
grid_result.best_params_