# Bank Stock Value Prediction
## **Data Understanding**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', 500) # your numbers here
pd.set_option('display.max_columns', 500)

In [None]:
sns.set(rc={'figure.figsize': (18,6)})

In [2]:
car_df = pd.read_csv('Data/car_v3.csv')

### **What does the data look like?**
    We have balance sheet features expressed in dollars
    We have income statement features expressed id dollars
    We have performance-ratio features
    We have segment banks based on asset size:  small, medium, and large

In [5]:
car_df.shape

(5542, 123)

### **Segment features into balance/income statement versus performance ratios**
    1.Create segment based on balance sheet and income statement features
    2.Create segment based on performance ratio features

In [3]:
#Create balance sheet and income statement dataframe
bal_inc_df = car_df.iloc[0:,0:]
#Drop performance ratios features but keep last row (bank size) and other rows.
bal_inc_df.drop(bal_inc_df.iloc[:,65:98], inplace=True, axis=1)
bal_inc_df.drop(columns=['Cert', 'address', 'City', 'State', 'Zip', 'Report Date', 'mutual','% Insured'], inplace=True)
bal_inc_df = bal_inc_df.iloc[0:,1:]

In [4]:
bal_inc_df.head()

Unnamed: 0,Fed RSSD,Bank Name,offdom,Number Employees,Total Assets,Cash & Bal Due DI,Cash Balance IB,Total Securities,FedFunds Sold Rev Purch,Net Loans Leases,Loan Allowance Loss,Trading Acct Assets,Bank Premises Fixed Assets,Other RE Owned,Goodwill Intangibles,All Other Assets,Total Liab Equity,Total Liabilities,Total Deposits,Interest-bearing Deposits,Deposits Domestic,Feds Funds Purchased,Trading Liabilities,Other Borrowed Funds,Subordinated Debt,All Other Liabilities,Total Equity,Bank Equity,Perpertual Pref Stock,Common Stock,Surplus,Undivided Profits,Equity Minor Interest,Noncurrent Loans Leases,Noncurrent Loans Leases Guaranteed,Income Earned,Earning Assets,Long-term Assets,Average Total Assets,Average Assets Quart,Total Risk Weighted Assets,Adjusted Average Assets,Life Insurance Assets,General Acct Life Insurance,Separate Acct Life Insurance,Hybrid Life Insurance,Volatile Liabilities,Insider Loans,FHLB advances,Loans Leases held for sale,Unused Loan Commitments,Tier One Capital,Tier 2 Risk-based capital,Total Unused Commitments,Derivatives,Yield on Earning Assets,Total Interest Income,Total Interest Expense,Net Interest Income,Provision Loan Lease Losses,Total NonInterest Income,Gross Fid Act Income,Service Charges Deposit Accts,Additional NonInterest Income,Total NonInterest Expense,Salaries Employee Benefits,Premises Equipment Expense,Additional NonInterest Expense,Pre-tax Net Op Income,Securities gains loss,Income Tax,Income before ext items,Extraordinary Gains,Net Income,Minority Interest NI,Net income of bank and min int,Net Charge-offs,Cash Dividends,Sale Conversion Ret Stock,Net Operating Income,Bank Size
0,746223,1880 Bank,6,48.0,343830,26199,22763.0,35457,383,250939,1824.0,0,5248.0,649.0,2313.0,22642,343830,293711,288063,205200.0,288063,0,0,0,0.0,5648,50119.0,50119.0,0.0,13242.0,33764.0,3113.0,0.0,5335,0.0,690.0,309542.0,98947.0,349437.0,346860.5,247420.0,340697.0,14091.0,4244.0,0.0,9847.0,17400.0,5164.0,0.0,0.0,33579,45082.0,2082.0,33579,0.0,4.147104,6551.0,436.0,6115.0,74.0,925.0,0.0,374.0,551,4593.0,2054.0,569.0,1970,2373,0.0,574.0,1799.0,0.0,1799.0,0.0,1799.0,96.0,0.0,0.0,1799.0,Medium
1,3317192,1st Advantage Bank,1,18.0,100617,5372,4321.0,3197,0,85815,789.0,0,2024.0,3220.0,0.0,989,100617,90171,79002,62773.0,79002,0,0,11000,0.0,169,10446.0,10446.0,0.0,534.0,16797.0,-6885.0,0.0,27,0.0,209.0,93333.0,6923.0,101469.0,101429.5,86558.0,100970.0,0.0,0.0,0.0,0.0,11727.0,1836.0,11000.0,0.0,12494,10482.0,789.0,12494,0.0,4.588862,2164.0,467.0,1697.0,60.0,91.0,0.0,22.0,69,1514.0,897.0,124.0,493,214,0.0,0.0,214.0,0.0,214.0,0.0,214.0,-20.0,0.0,0.0,214.0,Medium
2,564856,1st Bank,1,9.0,49045,23525,20429.0,0,0,23816,899.0,0,946.0,0.0,0.0,758,49044,42387,42255,30940.0,42255,0,0,0,0.0,132,6657.0,6657.0,0.0,657.0,4443.0,1557.0,0.0,0,0.0,487.0,44245.0,92.0,50383.67,50062.5,26678.0,49961.0,0.0,0.0,0.0,0.0,551.0,0.0,0.0,0.0,2543,6657.0,340.0,2543,0.0,4.025356,925.0,53.0,872.0,0.0,76.0,0.0,36.0,40,513.0,322.0,57.0,134,435,0.0,0.0,435.0,0.0,435.0,0.0,435.0,-344.0,173.0,0.0,435.0,Small
3,419255,1st Bank & Trust,4,42.0,154512,8608,2035.0,44866,7155,87454,1143.0,0,5052.0,0.0,0.0,1377,154512,139371,139033,87477.0,139033,0,0,0,0.0,338,15141.0,15141.0,0.0,500.0,752.0,13889.0,0.0,77,0.0,906.0,141510.0,40465.0,151457.67,153360.5,79885.0,153019.0,0.0,0.0,0.0,0.0,2562.0,469.0,0.0,0.0,2325,15601.0,1000.0,2325,0.0,4.76083,3269.0,242.0,3027.0,60.0,654.0,0.0,308.0,346,1711.0,893.0,337.0,481,1910,13.0,0.0,1923.0,0.0,1923.0,0.0,1923.0,16.0,925.0,0.0,1910.0,Medium
4,350657,1st Bank in Hominy,1,15.0,40766,11502,9909.0,9609,0,18662,193.0,0,416.0,51.0,0.0,526,40766,37350,36472,29087.0,36472,814,0,0,0.0,64,3416.0,3416.0,0.0,300.0,300.0,2816.0,0.0,339,0.0,186.0,38180.0,7620.0,40671.33,40846.5,17934.0,40325.0,0.0,0.0,0.0,0.0,2901.0,92.0,0.0,0.0,1407,3509.0,193.0,1407,0.0,4.003664,772.0,82.0,690.0,30.0,128.0,0.0,87.0,41,640.0,340.0,61.0,239,148,0.0,0.0,148.0,0.0,148.0,0.0,148.0,21.0,56.0,0.0,148.0,Small


In [6]:
bal_inc_df.shape

(5542, 81)

In [7]:
#Create performance ratios dataframe
perf_rat_df = car_df.iloc[0:,0:]
#Drop balance sheet features.
perf_rat_df.drop(perf_rat_df.iloc[:,12:64], inplace=True, axis=1)
#Drop income statement features.
perf_rat_df.drop(perf_rat_df.iloc[:,46:70], inplace=True, axis=1)
#Drop other rows
perf_rat_df.drop(columns=['Cert', 'address', 'City', 'State', 'Zip', 'Report Date', 'mutual', 'offdom', 'Number Employees'], inplace=True)
perf_rat_df = perf_rat_df.iloc[:,1:38]

In [8]:
perf_rat_df.head()

Unnamed: 0.1,Unnamed: 0,Fed RSSD,Bank Name,Yield on Earning Assets,Cost of Funding Earnings Assets,Net Interest Margin,NonInterest Inc to Ave Asset,NonInterest Exp to Ave Asset,Loan Lease Loss Prov to Assets,Net Op Inc to Assets,Return On Assets,Pretax ROA,Return on Equity,RE to Ave Equity,Net Chargeoffs to Loans,Credit Loss Prov to Chargeoffs,Efficiency Ratio,Assets per Emp,Cash Div to Net Inc,Earning Assets to Total Asst Ratio,Loss Allow to Loans,Loan Loss Allow to noncurr Loans,Noncurr Assets Other RE,Noncurrent Loans to Loans,Net Loans Lease to Tot Assets,Net Loans Lease to Deposits,Net Loans Lease to core deposits,Tot Dom Deposits to Tot Assets,Equity to Assets,Core Capital Ratio,Tier 1-based risk capital,Total risk based Capt ratio,Common Equity tier 1,Average Total Assets.1,Average earning assets,Average equity,Average Total Loans,Bank Size
0,0,746223,1880 Bank,4.147104,0.276009,3.871094,0.529423,2.6288,0.042354,1.029656,1.029656,1.358185,7.269763,7.269763,0.076867,77.083333,63.309659,7.163125,0.0,90.02763,0.721625,34.189316,1.740395,2.110673,72.983451,87.112541,92.712709,83.780647,14.57668,13.232286,18.220839,19.062323,18.220839,349437.0,315931.33,49492.67,249781.67,Medium
1,1,3317192,1st Advantage Bank,4.588862,0.990295,3.598567,0.179365,2.984163,0.118263,0.421804,0.421804,0.421804,4.134866,4.134866,-0.045891,-300.0,84.675615,5.589833,0.0,92.760667,0.911043,2922.222222,3.227089,0.031176,85.288768,108.623832,123.886587,78.517547,10.381943,10.381301,12.109799,13.021327,12.109799,101469.0,94315.33,10351.0,87162.67,Medium
2,2,564856,1st Bank,4.025356,0.230642,3.794714,0.301685,2.036374,0.0,1.72675,1.72675,1.72675,13.293951,8.006931,-2.92488,0.0,54.113924,5.449444,39.770115,90.21307,3.637467,813.665418,0.0,0.0,48.559486,56.362561,57.107232,86.155571,13.573249,13.324393,24.953145,26.227603,24.953145,50383.67,45958.67,6544.33,23522.33,Small
3,3,419255,1st Bank & Trust,4.76083,0.352438,4.408392,0.863608,2.259377,0.07923,2.522157,2.539323,2.539323,26.001719,13.494392,0.035824,375.0,46.481934,3.678857,48.101924,91.58512,1.290111,1484.415584,0.049834,0.08691,56.600135,62.901613,64.082479,89.982008,9.799239,10.195466,19.529323,20.781123,19.529323,151457.67,137329.0,14791.33,89325.67,Medium
4,4,350657,1st Bank in Hominy,4.003664,0.42526,3.578405,0.629436,3.14718,0.147524,0.727785,0.727785,0.727785,8.720425,5.420805,0.224936,142.857143,78.239609,2.717733,37.837838,93.656478,1.023601,56.932153,0.95668,1.797932,45.778345,51.168019,54.273666,89.466712,8.379532,8.701798,19.566187,20.642355,19.566187,40671.33,38564.67,3394.33,18672.0,Small


In [9]:
perf_rat_df.shape

(5542, 38)

### **Segment bal_inc_df based on small, medium and large banks.  Each segment will be analyzed separately.**

In [12]:
bal_inc_small_df = bal_inc_df.loc[car_df['Bank Size'] == 'Small']
bal_inc_medium_df = bal_inc_df.loc[car_df['Bank Size'] == 'Medium']
bal_inc_large_df = bal_inc_df.loc[car_df['Bank Size'] == 'Large']

In [13]:
bal_inc_medium_df.shape

(4996, 81)

In [14]:
bal_inc_small_df.shape

(505, 81)

In [15]:
bal_inc_large_df.shape

(41, 81)

### **Segment bal_inc_df based on small, medium and large banks.  Each segment will be analyzed separately.**

In [16]:
perf_rat_small_df = perf_rat_df.loc[car_df['Bank Size'] == 'Small']
perf_rat_medium_df = perf_rat_df.loc[car_df['Bank Size'] == 'Medium']
perf_rat_large_df = perf_rat_df.loc[car_df['Bank Size'] == 'Large']

In [17]:
bal_inc_medium_df.shape

(4996, 81)

In [18]:
bal_inc_small_df.shape

(505, 81)

In [19]:
bal_inc_large_df.shape

(41, 81)

### **Analyze Balance Sheet & Income Statement Feature Segment.**

In [20]:
perf_rat_medium_df.describe()

Unnamed: 0,Fed RSSD,Yield on Earning Assets,Cost of Funding Earnings Assets,Net Interest Margin,NonInterest Inc to Ave Asset,NonInterest Exp to Ave Asset,Loan Lease Loss Prov to Assets,Net Op Inc to Assets,Return On Assets,Pretax ROA,Return on Equity,RE to Ave Equity,Net Chargeoffs to Loans,Credit Loss Prov to Chargeoffs,Efficiency Ratio,Assets per Emp,Cash Div to Net Inc,Earning Assets to Total Asst Ratio,Loss Allow to Loans,Loan Loss Allow to noncurr Loans,Noncurr Assets Other RE,Noncurrent Loans to Loans,Net Loans Lease to Tot Assets,Net Loans Lease to Deposits,Net Loans Lease to core deposits,Tot Dom Deposits to Tot Assets,Equity to Assets,Core Capital Ratio,Tier 1-based risk capital,Total risk based Capt ratio,Common Equity tier 1,Average Total Assets.1,Average earning assets,Average equity,Average Total Loans
count,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0
mean,982535.8,4.374796,0.575063,3.799733,1.180668,3.15936,0.122791,1.222591,1.220883,1.426075,10.144929,5.543414,0.108056,47.607257,67.73589,6.236468,39.440274,92.972587,1.29563,1089.849551,0.856527,0.986335,66.271336,80.569061,326.101988,83.099371,11.751049,11.731835,25.292191,26.358196,25.264536,966913.3,891834.5,114587.2,666556.7
std,1035319.0,1.021336,0.306493,0.958571,10.035937,5.514374,0.411783,4.975244,4.968852,6.252945,13.035112,6.829558,0.580769,4198.377234,23.672157,24.078027,191.103918,3.808019,0.737136,5993.058672,1.37583,1.527478,16.135775,32.693101,13358.157183,7.989412,5.525288,5.489716,500.239606,500.222953,500.240249,3334449.0,3060815.0,419297.8,2275357.0
min,37.0,0.004849,0.0,-1.691405,-5.311467,0.0,-4.584085,-13.005543,-13.005543,-13.000572,-71.74556,-89.020316,-4.284245,-117500.0,-280.638144,0.095114,-3846.153846,2.308442,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040228,1.858165,2.11029,2.545715,3.212663,2.545715,37142.33,1566.67,3026.33,0.33
25%,319913.2,3.905537,0.349942,3.353701,0.315592,2.2919,0.0,0.745784,0.747277,0.879003,6.596481,2.992371,-0.00551,0.0,58.212708,3.902945,0.0,91.574836,0.950269,95.078563,0.179379,0.185584,57.739972,68.04929,72.156475,80.462836,9.407191,9.453846,12.406994,13.459011,12.375742,123767.4,115220.1,13991.25,76935.25
50%,643396.0,4.316201,0.540268,3.759286,0.530685,2.762022,0.061533,1.075435,1.077428,1.26009,9.813373,5.774089,0.013986,47.607257,66.749986,4.788256,27.472527,93.310483,1.194493,206.621489,0.49973,0.553368,69.673724,82.990164,89.167159,84.701626,10.79489,10.678721,14.89968,16.010109,14.882586,242757.2,226236.2,26763.17,162305.0
75%,949191.8,4.755131,0.758566,4.159463,0.83868,3.250694,0.142949,1.421429,1.424451,1.646214,13.147539,8.692979,0.098518,200.0,75.301766,6.190051,58.63508,94.993853,1.487665,655.106058,1.058548,1.215154,78.018209,94.315599,105.225374,87.924165,12.603703,12.411036,19.036933,20.117917,19.035501,546010.2,511311.5,61751.5,391362.5
max,5227101.0,25.299932,3.470988,23.949882,518.707022,247.826298,11.230681,331.488091,331.488091,419.605254,779.934535,50.877685,18.545028,80000.0,1095.454545,1572.0266,10000.0,99.788768,14.98674,140700.0,32.877823,31.757129,98.857443,1818.389524,930026.0,97.154347,97.902759,100.897155,35347.4026,35347.4026,35347.4026,49130180.0,47932820.0,9149333.0,36330660.0


In [21]:
bal_inc_medium_df.describe()

Unnamed: 0,Fed RSSD,offdom,Number Employees,Total Assets,Cash & Bal Due DI,Cash Balance IB,Total Securities,FedFunds Sold Rev Purch,Net Loans Leases,Loan Allowance Loss,Trading Acct Assets,Bank Premises Fixed Assets,Other RE Owned,Goodwill Intangibles,All Other Assets,Total Liab Equity,Total Liabilities,Total Deposits,Interest-bearing Deposits,Deposits Domestic,Feds Funds Purchased,Trading Liabilities,Other Borrowed Funds,Subordinated Debt,All Other Liabilities,Total Equity,Bank Equity,Perpertual Pref Stock,Common Stock,Surplus,Undivided Profits,Equity Minor Interest,Noncurrent Loans Leases,Noncurrent Loans Leases Guaranteed,Income Earned,Earning Assets,Long-term Assets,Average Total Assets,Average Assets Quart,Total Risk Weighted Assets,Adjusted Average Assets,Life Insurance Assets,General Acct Life Insurance,Separate Acct Life Insurance,Hybrid Life Insurance,Volatile Liabilities,Insider Loans,FHLB advances,Loans Leases held for sale,Unused Loan Commitments,Tier One Capital,Tier 2 Risk-based capital,Total Unused Commitments,Derivatives,Yield on Earning Assets,Total Interest Income,Total Interest Expense,Net Interest Income,Provision Loan Lease Losses,Total NonInterest Income,Gross Fid Act Income,Service Charges Deposit Accts,Additional NonInterest Income,Total NonInterest Expense,Salaries Employee Benefits,Premises Equipment Expense,Additional NonInterest Expense,Pre-tax Net Op Income,Securities gains loss,Income Tax,Income before ext items,Extraordinary Gains,Net Income,Minority Interest NI,Net income of bank and min int,Net Charge-offs,Cash Dividends,Sale Conversion Ret Stock,Net Operating Income
count,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0,4996.0
mean,982535.8,10.21217,151.305845,983310.4,56557.98,42894.59,183047.1,8043.698,671245.8,7581.197,1436.051,11727.26,1084.110689,18707.92,31460.49,983310.4,866346.4,781160.0,599834.2,779202.9,11090.6,564.099279,63049.9,931.2494,9550.533,116964.1,116858.1,372.389512,5032.04,63199.15,48254.54,105.964371,6672.925,1541.477,3634.993,906334.4,309781.2,966913.3,974764.0,720471.9,955029.8,11250.466974,7947.961569,1626.442954,1676.06245,92085.97,5350.045436,56937.4,9469.503,255678.8,101777.2,7472.479584,255680.4,115092.9,4.374796,19617.62,2877.952962,16739.67,994.683147,5189.083867,789.454564,874.151521,3471.856685,13210.888911,6873.271217,1419.396117,4918.221577,7723.179944,22.490993,1540.07526,6205.595677,-38.428543,6162.171938,4.995196,6167.167134,823.198959,2730.068054,190.831665,6187.188523
std,1035319.0,27.758616,425.168535,3385014.0,334514.9,315678.9,919583.6,256950.5,2295451.0,32434.45,43873.34,36964.22,4894.063984,132427.3,141976.1,3385014.0,2978430.0,2628517.0,2004826.0,2618586.0,67611.67,12923.501888,389158.0,15133.894448,50255.94,426163.4,425538.9,11022.065803,60945.41,306867.6,175426.0,3232.398173,63047.04,52391.09,19608.64,3105365.0,1180424.0,3334449.0,3357693.0,2481528.0,3257409.0,42784.450628,28752.854037,16167.330488,10443.820496,390699.9,16771.849454,363048.0,130622.9,2492876.0,341316.6,30806.690301,2492875.0,1243568.0,1.021336,72117.61,12260.16735,61531.23,12010.996918,24655.290419,9640.013721,4432.949386,18370.534838,44802.676161,21529.899872,4648.397843,23936.004004,29357.124644,574.655776,6971.310458,22926.511189,2354.985225,22966.514736,120.394653,22981.851768,11828.072058,15640.854037,3803.45089,22867.88559
min,37.0,0.0,0.0,50004.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,155.0,50004.0,1183.0,500.0,0.0,500.0,0.0,0.0,0.0,0.0,4.0,2634.0,2634.0,0.0,0.0,0.0,-971905.0,-2165.0,0.0,0.0,0.0,1563.0,0.0,37142.33,40229.0,154.0,39633.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2633.0,0.0,0.0,0.0,0.004849,2.0,0.0,-211.0,-40479.0,-11652.0,0.0,0.0,-11652.0,0.0,0.0,-3322.0,-270.0,-10369.0,-12066.0,-12000.0,-11532.0,-164043.0,-151163.0,-1318.0,-151163.0,-26994.0,0.0,-92160.0,-11514.62
25%,319913.2,2.0,27.0,124466.8,7218.25,3320.75,16892.0,0.0,76405.5,932.75,0.0,1363.75,0.0,0.0,2518.75,124467.0,109009.0,103694.2,80223.0,103665.5,0.0,0.0,0.0,0.0,424.0,14155.0,14155.0,0.0,125.0,2100.0,5826.25,0.0,264.0,0.0,559.0,116033.5,27575.75,123767.4,124119.6,81670.5,123437.8,0.0,0.0,0.0,0.0,5614.5,350.0,0.0,0.0,8933.25,14299.75,841.0,8933.25,0.0,3.905537,2470.0,259.0,2176.0,0.0,251.75,0.0,69.0,137.0,1723.75,957.75,178.0,544.0,643.75,0.0,0.0,565.0,0.0,564.0,0.0,564.75,-3.0,0.0,0.0,568.5
50%,643396.0,4.0,51.0,245114.0,14891.5,9472.0,41402.0,0.0,160371.0,2025.5,0.0,3588.5,72.0,13.5,6084.5,245114.0,217031.0,204567.0,158965.0,204567.0,0.0,0.0,3000.0,0.0,1228.5,26938.0,26893.5,0.0,500.0,7431.5,14715.0,0.0,1023.0,0.0,1073.0,227354.5,65065.5,242757.2,243775.8,169724.5,242323.5,2340.0,1782.0,0.0,0.0,15371.0,1483.0,2592.5,0.0,23064.0,27163.5,1806.0,23064.0,0.0,4.316201,4921.5,592.0,4295.0,63.0,645.0,0.0,172.0,389.0,3364.5,1915.0,361.5,1035.5,1472.5,0.0,128.0,1300.0,0.0,1300.0,0.0,1300.0,9.0,277.5,0.0,1294.91
75%,949191.8,8.0,112.0,553688.5,33598.5,23660.25,100879.8,1138.0,388179.2,4715.0,0.0,8757.0,595.25,1702.25,15679.5,553688.5,491401.5,465054.2,359662.5,465054.2,1335.25,0.0,18985.0,0.0,3601.0,62673.75,62673.75,0.0,1558.5,22084.5,35142.0,0.0,3022.25,0.0,2245.0,515390.0,174145.5,546010.2,549831.8,401210.8,546534.5,7295.25,5926.25,0.0,0.0,43403.75,4579.5,18000.0,375.0,69420.5,61790.5,4340.0,69519.0,300.0,4.755131,11153.75,1492.75,9526.0,271.25,1963.75,0.0,441.25,1275.0,7798.25,4479.25,901.25,2398.25,3691.25,0.0,565.25,3213.25,0.0,3202.75,0.0,3209.25,104.0,1008.0,0.0,3180.25
max,5227101.0,739.0,6674.0,49117600.0,15784000.0,15749000.0,45014060.0,17359000.0,36644060.0,1120828.0,2509963.0,1068099.0,168000.0,3111880.0,5594944.0,49117600.0,45501400.0,43727870.0,42702090.0,43727870.0,1666692.0,820407.0,13240540.0,624385.0,1169000.0,9279000.0,9279000.0,625000.0,3481683.0,5978818.0,6431000.0,200000.0,3549089.0,3471840.0,1161161.0,47738890.0,36627560.0,49130180.0,49275880.0,36280140.0,48878100.0,877943.0,801691.0,462977.0,397771.0,7840984.0,515719.0,13230600.0,4681416.0,94059570.0,9257000.0,950445.0,94059570.0,58295540.0,25.299932,1628847.0,264542.0,1467615.0,534243.0,456675.0,383194.0,133000.0,456672.0,839623.0,377595.0,81052.0,636557.0,413622.0,22543.0,115114.0,363938.0,2410.0,363938.0,6123.0,363938.0,554128.0,449000.0,201736.0,362419.12


In [None]:

#Pull specific rows and turn visualize as columns
car_df.ix[0]

In [None]:
#What are the available columns?
car_df.columns

In [None]:
#descriptive statistics summary of a given column dataset
car_df['Net Income'].describe()

In [None]:
car_df['Cash Dividends'].describe()

In [None]:
car_df['Total Assets'].describe()

Definition of Kurtosis https://en.wikipedia.org/wiki/Kurtosis

In [None]:
print("Skewness: %f" % car_df['Cash Dividends'].skew())
print("Kurtosis: %f" % car_df['Cash Dividends'].kurt())

In [None]:
print("Skewness: %f" % car_df['Total Assets'].skew())
print("Kurtosis: %f" % car_df['Total Assets'].kurt())

In [None]:
scatter1 = pd.concat([car_df['Total Assets'], car_df['Cash Dividends']], axis=1)
scatter1.plot.scatter(x='Total Assets', y='Cash Dividends')

In [None]:
scatter1 = pd.concat([car_df['Total Assets'], car_df['Bank Equity']], axis=1)
scatter1.plot.scatter(x='Total Assets', y='Bank Equity')

In [None]:
car_df.plot.scatter(x='Total Assets', y='Net Income');

In [None]:
# Show correlations
car_df.corr()

In [None]:
# Correlation visualization
corrmat = car_df.corr()
f, ax = plt.subplots(figsize=(12, 12))
sns.heatmap(corrmat, vmax=.8, square=True);

Boxplot Total Assets

boxplot documentation: https://seaborn.pydata.org/generated/seaborn.boxplot.html

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x=car_df["Total Assets"])

Pairplot


In [None]:
#sns.pairplot(car_df)

In [None]:
#sns.heatmap(car_df.corr(),cmap='coolwarm',annot=True)

In [None]:
#sns.clustermap(car_df)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
#standardizing data: Total Assets

total_assets_standardized = StandardScaler().fit_transform(car_df['Total Assets'][:,np.newaxis]);
low_range = total_assets_standardized[total_assets_standardized[:,0].argsort()][:20]
high_range= total_assets_standardized[total_assets_standardized[:,0].argsort()][-20:]
print('outer range (low) of the distribution:')
low_range

In [None]:
print('\nouter range (high) of the distribution:')
high_range

There are a few high variances as seen on above columns