In [1]:
import pandas as pd
import numpy as np

In [2]:
# read in features data, extracting only the 55 rows corresponding to unique SBAs
nineties = pd.read_csv('nineties_withtens.csv')
nineties = nineties.iloc[:55]
thousands = pd.read_csv('thousands_withtens.csv')
thousands = thousands.iloc[:55]
tens_w_nineties = pd.read_csv('tens_withnineties.csv')
tens_w_nineties = tens_w_nineties.iloc[:55]
tens_w_thousands = pd.read_csv('tens_withthousands.csv')
tens_w_thousands = tens_w_thousands.iloc[:55]

response_rates = pd.read_csv('participationrates2000_2010.csv')

# read in tract numbers as strings to search with later
tract_neighborhood = pd.read_csv('nyc2010census_tabulation_equiv.csv',converters={'tract_num': lambda x: str(x)})

sba_neighborhood = pd.read_csv('sba_to_neigh_codes.csv')

In [3]:
# Extract response rates by borough
manhattan_rates = response_rates[response_rates['geo_id'][:].str[0:5] == '36061'][:]
queens_rates = response_rates[response_rates['geo_id'][:].str[0:5] == '36081'][:]
brooklyn_rates = response_rates[response_rates['geo_id'][:].str[0:5] == '36047'][:]
bronx_rates = response_rates[response_rates['geo_id'][:].str[0:5] == '36005'][:]
staten_island_rates = response_rates[response_rates['geo_id'][:].str[0:5] == '36085'][:]

In [4]:
sba_response_rate_2000 = []
sba_response_rate_2010 = []

# match sba to neighborhood code 
for sba in thousands['sub-burough'][:]:
    sba = str(sba)
    print(sba)
    borough = sba[0]
    
    neigh_codes = sba_neighborhood[sba][:].dropna()
    print("Number of neighborhood codes in SBA:",len(neigh_codes))
    sba_rate_2000 = 0
    sba_rate_2010 = 0
    number_of_codes = 0

    # extract corresponding tract numbers
    for code in neigh_codes:

        match_bool = tract_neighborhood['neighborhood_code'][:].str.match(code)
        tract_nums = tract_neighborhood[match_bool]['tract_num']
        print("Neighborhood code:",code)
        print("Number of tracts in neighborhood code:",len(tract_nums))
        code_rate_2000 = 0
        code_rate_2010 = 0
        number_of_tracts = 0

        if sum(match_bool)==0:
            print("No matches for code",code)
            continue
        else:
            # extract 2010 response rates by unique geoid      
            if borough=='1': # bronx
                stem = '36005'

                for tract in tract_nums:

                    geoid = stem+tract
                    tract_rate_2000 = list(bronx_rates[bronx_rates['geo_id'][:] == geoid]['2000_rate'])
                    tract_rate_2010 = list(bronx_rates[bronx_rates['geo_id'][:] == geoid]['2010_rate'])

                    if len(tract_rate_2000) == 0:
                        print("For neighborhood code",code,", no tracts match geoid",geoid)
                        continue
                    else:
                        tract_rate_2000 = tract_rate_2000[0]
                        tract_rate_2010 = tract_rate_2010[0]

                    # handle erroneous response rates
                    if tract_rate_2000 > 1:
                        tract_rate_2000 = 1
                    
                    if tract_rate_2010 > 1:
                        tract_rate_2010 = 1

                    number_of_tracts += 1
                    code_rate_2000 += tract_rate_2000
                    code_rate_2010 += tract_rate_2010

            elif borough=='2': # brookyln
                stem = '36047'

                for tract in tract_nums:

                    geoid = stem+tract
                    tract_rate_2000 = list(brooklyn_rates[brooklyn_rates['geo_id'][:] == geoid]['2000_rate'])
                    tract_rate_2010 = list(brooklyn_rates[brooklyn_rates['geo_id'][:] == geoid]['2010_rate'])

                    if len(tract_rate_2000) == 0:
                        print("For neighborhood code",code,", no tracts match geoid",geoid)
                        continue
                    else:
                        tract_rate_2000 = tract_rate_2000[0]
                        tract_rate_2010 = tract_rate_2010[0]

                    # handle erroneous response rates
                    if tract_rate_2000 > 1:
                        tract_rate_2000 = 1
                    
                    if tract_rate_2010 > 1:
                        tract_rate_2010 = 1

                    number_of_tracts += 1
                    code_rate_2000 += tract_rate_2000
                    code_rate_2010 += tract_rate_2010
                    
            elif borough=='3': # manhattan
                stem = '36061'

                for tract in tract_nums:

                    geoid = stem+tract
                    tract_rate_2000 = list(manhattan_rates[manhattan_rates['geo_id'][:] == geoid]['2000_rate'])
                    tract_rate_2010 = list(manhattan_rates[manhattan_rates['geo_id'][:] == geoid]['2010_rate'])

                    if len(tract_rate_2000) == 0:
                        print("For neighborhood code",code,", no tracts match geoid",geoid)
                        continue
                    else:
                        tract_rate_2000 = tract_rate_2000[0]
                        tract_rate_2010 = tract_rate_2010[0]

                    # handle erroneous response rates
                    if tract_rate_2000 > 1:
                        tract_rate_2000 = 1
                    
                    if tract_rate_2010 > 1:
                        tract_rate_2010 = 1

                    number_of_tracts += 1
                    code_rate_2000 += tract_rate_2000
                    code_rate_2010 += tract_rate_2010
                    
            elif borough=='4': # queens
                stem = '36081'

                for tract in tract_nums:

                    geoid = stem+tract 
                    tract_rate_2000 = list(queens_rates[queens_rates['geo_id'][:] == geoid]['2000_rate'])
                    tract_rate_2010 = list(queens_rates[queens_rates['geo_id'][:] == geoid]['2010_rate'])

                    if len(tract_rate_2000) == 0:
                        print("For neighborhood code",code,", no tracts match geoid",geoid)
                        continue
                    else:
                        tract_rate_2000 = tract_rate_2000[0]
                        tract_rate_2010 = tract_rate_2010[0]

                    # handle erroneous response rates
                    if tract_rate_2000 > 1:
                        tract_rate_2000 = 1
                    
                    if tract_rate_2010 > 1:
                        tract_rate_2010 = 1

                    number_of_tracts += 1
                    code_rate_2000 += tract_rate_2000
                    code_rate_2010 += tract_rate_2010

            elif borough=='5': # staten island
                stem = '36085'

                for tract in tract_nums:

                    geoid = stem+tract
                    tract_rate_2000 = list(staten_island_rates[staten_island_rates['geo_id'][:] == geoid]['2000_rate'])
                    tract_rate_2010 = list(staten_island_rates[staten_island_rates['geo_id'][:] == geoid]['2010_rate'])

                    if len(tract_rate_2000) == 0:
                        print("For neighborhood code",code,", no tracts match geoid",geoid)
                        continue
                    else:
                        tract_rate_2000 = tract_rate_2000[0]
                        tract_rate_2010 = tract_rate_2010[0]

                    # handle erroneous response rates
                    if tract_rate_2000 > 1:
                        tract_rate_2000 = 1
                    
                    if tract_rate_2010 > 1:
                        tract_rate_2010 = 1

                    number_of_tracts += 1
                    code_rate_2000 += tract_rate_2000
                    code_rate_2010 += tract_rate_2010


            if number_of_tracts == 0:
                print("For SBA",sba,"and neighborhood code",code,
                      ", there are zero matched tracts, so cannot calculate code_rate.")
            else:
                number_of_codes += 1
                code_rate_2000 = code_rate_2000 / number_of_tracts
                code_rate_2010 = code_rate_2010 / number_of_tracts
                sba_rate_2000 += code_rate_2000
                sba_rate_2010 += code_rate_2010

    if number_of_codes == 0:
        print("No codes matched for SBA",sba)
        continue
    else:
        sba_rate_2000 = sba_rate_2000 / number_of_codes
        sba_rate_2010 = sba_rate_2010 / number_of_codes
        sba_response_rate_2000.append(sba_rate_2000)
        sba_response_rate_2010.append(sba_rate_2010)
        print("2000 SBA rate for SBA",sba,"is",sba_rate_2000)
        print("2010 SBA rate for SBA",sba,"is",sba_rate_2010)

# append response values to data to get datasets for modeling
import copy

dataset2000 = copy.deepcopy(nineties)
dataset2000['sba_response_rate_2000'] = sba_response_rate_2000
dataset2000 = dataset2000.set_index('sub-burough')

dataset2010 = copy.deepcopy(thousands)
dataset2010['sba_response_rate_2010'] = sba_response_rate_2010
dataset2010 = dataset2010.set_index('sub-burough')

201
Number of neighborhood codes in SBA: 4
Neighborhood code: BK76
Number of tracts in neighborhood code: 12
For neighborhood code BK76 , no tracts match geoid 36047056100
Neighborhood code: BK73
Number of tracts in neighborhood code: 13
Neighborhood code: BK90
Number of tracts in neighborhood code: 10
For neighborhood code BK90 , no tracts match geoid 36047044900
Neighborhood code: BK72
Number of tracts in neighborhood code: 7
2000 SBA rate for SBA 201 is 0.5378726828726829
2010 SBA rate for SBA 201 is 0.5828574203574204
202
Number of neighborhood codes in SBA: 4
Neighborhood code: BK69
Number of tracts in neighborhood code: 10
Neighborhood code: BK68
Number of tracts in neighborhood code: 10
For neighborhood code BK68 , no tracts match geoid 36047021100
Neighborhood code: BK38
Number of tracts in neighborhood code: 12
For neighborhood code BK38 , no tracts match geoid 36047001500
Neighborhood code: BK09
Number of tracts in neighborhood code: 6
For neighborhood code BK09 , no tracts m

For neighborhood code BK91 , no tracts match geoid 36047093200
Neighborhood code: BK96
Number of tracts in neighborhood code: 17
For neighborhood code BK96 , no tracts match geoid 36047094600
2000 SBA rate for SBA 217 is 0.428587962962963
2010 SBA rate for SBA 217 is 0.5070271164021164
218
Number of neighborhood codes in SBA: 2
Neighborhood code: BK50
Number of tracts in neighborhood code: 34
For neighborhood code BK50 , no tracts match geoid 36047096000
Neighborhood code: BK45
Number of tracts in neighborhood code: 19
For neighborhood code BK45 , no tracts match geoid 36047069601
For neighborhood code BK45 , no tracts match geoid 36047069602
2000 SBA rate for SBA 218 is 0.5457486631016043
2010 SBA rate for SBA 218 is 0.5524955436720143
301
Number of neighborhood codes in SBA: 3
Neighborhood code: MN23
Number of tracts in neighborhood code: 14
Neighborhood code: MN24
Number of tracts in neighborhood code: 10
For neighborhood code MN24 , no tracts match geoid 36061003700
Neighborhood co

For neighborhood code QN21 , no tracts match geoid 36081067100
Neighborhood code: QN20
Number of tracts in neighborhood code: 19
For neighborhood code QN20 , no tracts match geoid 36081061301
For neighborhood code QN20 , no tracts match geoid 36081061302
Neighborhood code: QN19
Number of tracts in neighborhood code: 13
2000 SBA rate for SBA 405 is 0.6732937405731523
2010 SBA rate for SBA 405 is 0.5698708521870287
406
Number of neighborhood codes in SBA: 2
Neighborhood code: QN17
Number of tracts in neighborhood code: 24
For neighborhood code QN17 , no tracts match geoid 36081071303
For neighborhood code QN17 , no tracts match geoid 36081071304
For neighborhood code QN17 , no tracts match geoid 36081071305
For neighborhood code QN17 , no tracts match geoid 36081071306
For neighborhood code QN17 , no tracts match geoid 36081072300
For neighborhood code QN17 , no tracts match geoid 36081074900
For neighborhood code QN17 , no tracts match geoid 36081075701
For neighborhood code QN17 , no t

For neighborhood code BX14 , no tracts match geoid 36005018301
For neighborhood code BX14 , no tracts match geoid 36005018302
Neighborhood code: BX63
Number of tracts in neighborhood code: 7
For neighborhood code BX63 , no tracts match geoid 36005006300
For neighborhood code BX63 , no tracts match geoid 36005020900
For neighborhood code BX63 , no tracts match geoid 36005022101
For neighborhood code BX63 , no tracts match geoid 36005022102
Neighborhood code: BX26
Number of tracts in neighborhood code: 7
2000 SBA rate for SBA 103 is 0.6498412698412698
2010 SBA rate for SBA 103 is 0.6984126984126985
104
Number of neighborhood codes in SBA: 3
Neighborhood code: BX36
Number of tracts in neighborhood code: 14
For neighborhood code BX36 , no tracts match geoid 36005005300
For neighborhood code BX36 , no tracts match geoid 36005020501
For neighborhood code BX36 , no tracts match geoid 36005020502
For neighborhood code BX36 , no tracts match geoid 36005021700
For neighborhood code BX36 , no tra

In [5]:
# Save responses rates by SBA for 2000 and 2010
# just_sbas_and_rates_2000 = dataset2000['sba_response_rate_2000']
# just_sbas_and_rates_2010 = dataset2010['sba_response_rate_2010']
# just_sbas_and_rates_2000.to_csv('SBA_Rates_2000.csv')
# just_sbas_and_rates_2010.to_csv('SBA_Rates_2010.csv')

In [6]:
# BIG NOTE: POTENTIALLY INACCURATE DUE TO ERRORS SEEN IN PRINTOUT ABOVE
# BIG NOTE: Fills in NaN vals with average value in that category in order to make the random forest
# Random Forest Model: Trained and tested on 2000 data

# Forming training and testing sets and replace NaN with mean
k = len(dataset2000.columns) - 1
tens_feats = tens_w_nineties.set_index('sub-burough').iloc[:,1:k].values
X = dataset2000.iloc[:,1:k].values
y = dataset2000.iloc[:,k].values

from sklearn.preprocessing import Imputer
imp = Imputer(missing_values=np.nan, strategy='mean')
imp = imp.fit(tens_feats) # fit imputer to tens_feats so that number of features will match up for prediction later
X = imp.transform(X)

from sklearn.model_selection import train_test_split

# test_size=0.2 -> Training with 4/5 of data to test on the remaining 1/5
# random_state=0 for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Feature Scaling: This is optional, we can compare scaling vs. not scaling and see what's better
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


# Train the random forest
# Set random_state for reproducibility
# Can change n_estimators to see best performance
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=200, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)


# Evaluate model
from sklearn import metrics

print('2000 Census Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('2000 Census Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('2000 Census Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

2000 Census Mean Absolute Error: 0.06328008304429798
2000 Census Mean Squared Error: 0.005936164684902409
2000 Census Root Mean Squared Error: 0.07704650988138534


In [7]:
# Feature importances (with indices) in 2000
feat_importances = enumerate(regressor.feature_importances_)

# Top n important features in 2000
n = 10 # defaults to top 10
k = len(dataset2000.columns) - 1
ranked_feat_importances = sorted(feat_importances, key=lambda x:x[1]) # ascending sort
for i in range(1,n+1):
    index, importance = ranked_feat_importances[-i]
    feat = list(dataset2000.iloc[:,1:k].columns)[index]
    print("Rank",i,"most important feature for 2000 data is",feat,", with importance value",importance)

Rank 1 most important feature for 2000 data is 1989.0_delta_2 , with importance value 0.06452560850812666
Rank 2 most important feature for 2000 data is _delta_1 , with importance value 0.05412723400984821
Rank 3 most important feature for 2000 data is 'chinese', 'viet'_delta_1 , with importance value 0.05179744354404514
Rank 4 most important feature for 2000 data is Excellent_delta_2 , with importance value 0.04248714494139699
Rank 5 most important feature for 2000 data is 2001_delta_3 , with importance value 0.026376446805979427
Rank 6 most important feature for 2000 data is 'white', 'black', 'chinese'_delta_2 , with importance value 0.026196197203403274
Rank 7 most important feature for 2000 data is 'Kor', 'asian-ind'_delta_1 , with importance value 0.02482607799632863
Rank 8 most important feature for 2000 data is pctnt_recent_1999_delta_1 , with importance value 0.02328365698220781
Rank 9 most important feature for 2000 data is black_delta_2 , with importance value 0.0216849215320

In [8]:
# TO DO
# Check slides 27, 28
# Response rates by SBA for the predictions from each model
# Check python documentation for ROC Curve and maybe also confusion matrix, maybe precision-recall

In [9]:
# 2020 predictions using model trained on 90s features
k = len(dataset2000.columns) - 1
indexed_feats = tens_w_nineties.set_index('sub-burough')
tens_feats = tens_w_nineties.set_index('sub-burough').iloc[:,1:k].values

# Deal with NaNs
tens_feats = imp.transform(tens_feats)

# Feature Scaling: This is optional, we can compare scaling vs. not scaling and see what's better
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
tens_feats = sc.fit_transform(tens_feats)

preds_2020 = list(regressor.predict(tens_feats))
pred_from_90s = enumerate(preds_2020)

# Bottom n SBAs by predicted response rate Census 2020
n = 10 # defaults to bottom 10
ranked_rates = sorted(pred_from_90s, key=lambda x:x[1], reverse=True) # descending sort

for i in range(1,n+1):
    index, pred_rate = ranked_rates[-i]
    sba = str(list(indexed_feats.index)[index])
    print("Rank",i,"lowest predicted response rate SBA based on 2000 data is SBA",sba,
          ", with predicted response rate",pred_rate)

# Save predicted responses rates by SBA for 2020 from model trained on 2000 data
indexed_feats['preds_2020'] = preds_2020
sbas_and_preds = indexed_feats['preds_2020']
sbas_and_preds.to_csv('Predicted_SBA_Rates_2000_model.csv')

Rank 1 lowest predicted response rate SBA based on 2000 data is SBA 409 , with predicted response rate 0.5802390833473324
Rank 2 lowest predicted response rate SBA based on 2000 data is SBA 501 , with predicted response rate 0.5809059084812196
Rank 3 lowest predicted response rate SBA based on 2000 data is SBA 214 , with predicted response rate 0.5826765176509963
Rank 4 lowest predicted response rate SBA based on 2000 data is SBA 410 , with predicted response rate 0.5831084634097502
Rank 5 lowest predicted response rate SBA based on 2000 data is SBA 412 , with predicted response rate 0.5844223323275799
Rank 6 lowest predicted response rate SBA based on 2000 data is SBA 109 , with predicted response rate 0.5844933818620975
Rank 7 lowest predicted response rate SBA based on 2000 data is SBA 218 , with predicted response rate 0.5846141157395395
Rank 8 lowest predicted response rate SBA based on 2000 data is SBA 107 , with predicted response rate 0.5850824785279647
Rank 9 lowest predicted 

In [10]:
# BIG NOTE: POTENTIALLY INACCURATE DUE TO ERRORS SEEN IN PRINTOUT ABOVE
# BIG NOTE: Fills in NaN vals with average value in that category in order to make the random forest
# Random Forest Model: Trained and tested on 2010 data

# Forming training and testing sets and replace NaN with mean
k = len(dataset2010.columns) - 1
tens_feats = tens_w_thousands.set_index('sub-burough').iloc[:,1:k].values
X = dataset2010.iloc[:,1:k].values
y = dataset2010.iloc[:,k].values

from sklearn.preprocessing import Imputer
imp = Imputer(missing_values=np.nan, strategy='mean')
imp = imp.fit(tens_feats) # fit imputer to tens_feats so that number of features will match up for prediction later
X = imp.transform(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Feature Scaling: This is optional, we can compare scaling vs. not scaling and see what's better
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


# Train the random forest
# Set random_state for reproducibility
# Can change n_estimators to see best performance
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=200, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)


# Evaluate model
from sklearn import metrics

print('2010 Census Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('2010 Census Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('2010 Census Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

2010 Census Mean Absolute Error: 0.05237622742251183
2010 Census Mean Squared Error: 0.0033534021425956333
2010 Census Root Mean Squared Error: 0.05790856709154211


In [11]:
# Feature importances (with indices) in 2010
feat_importances = enumerate(regressor.feature_importances_)
feat_indices = []

# Top n important features in 2010
n = 10 # default to top 10
k = len(dataset2010.columns) - 1
ranked_feat_importances = sorted(feat_importances, key=lambda x:x[1]) # ascending sort
for i in range(1,n+1):
    index, importance = ranked_feat_importances[-i]
    feat_indices.append(index)
    feat = list(dataset2010.iloc[:,1:k].columns)[index]
    print("Rank",i,"most important feature for 2010 data is",feat,", with importance value",importance)

Rank 1 most important feature for 2010 data is 2004_delta_1 , with importance value 0.16500833832550227
Rank 2 most important feature for 2010 data is 2005_delta_3 , with importance value 0.06655909259187784
Rank 3 most important feature for 2010 data is std_value_delta_1 , with importance value 0.05787849878364259
Rank 4 most important feature for 2010 data is 31_delta_2 , with importance value 0.054775337196144046
Rank 5 most important feature for 2010 data is 1_delta_1 , with importance value 0.035202472501095074
Rank 6 most important feature for 2010 data is 2003_delta_1 , with importance value 0.0328850879184618
Rank 7 most important feature for 2010 data is 1988.0_delta_1 , with importance value 0.02179831654943404
Rank 8 most important feature for 2010 data is mean_income_delta_1 , with importance value 0.020492770145899807
Rank 9 most important feature for 2010 data is pctnt_recent_1991_delta_2 , with importance value 0.018789313599816223
Rank 10 most important feature for 2010

In [12]:
# 2020 predictions using model trained with 00s features
k = len(dataset2010.columns) - 1
indexed_feats = tens_w_thousands.set_index('sub-burough')
tens_feats = tens_w_thousands.set_index('sub-burough').iloc[:,1:k].values

# Deal with NaNs
tens_feats = imp.transform(tens_feats)

# Feature Scaling: This is optional, we can compare scaling vs. not scaling and see what's better
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
tens_feats = sc.fit_transform(tens_feats)

preds_2020 = regressor.predict(tens_feats)
pred_from_00s = enumerate(preds_2020)

# Bottom n SBAs by predicted response rate Census 2020
n = 10 # defaults to bottom 10
ranked_rates = sorted(pred_from_00s, key=lambda x:x[1], reverse=True) # descending sort
for i in range(1,n+1):
    index, pred_rate = ranked_rates[-i]
    sba = str(list(indexed_feats.index)[index])
    print("Rank",i,"lowest response rate SBA for 2010 data is SBA",sba,", with predicted response rate",pred_rate)
    
# Save predicted responses rates by SBA for 2020 from model trained on 2010 data
indexed_feats['preds_2020'] = preds_2020
sbas_and_preds = indexed_feats['preds_2020']
sbas_and_preds.to_csv('Predicted_SBA_Rates_2010_model.csv')

Rank 1 lowest response rate SBA for 2010 data is SBA 308 , with predicted response rate 0.5888653160794859
Rank 2 lowest response rate SBA for 2010 data is SBA 410 , with predicted response rate 0.5910226130908496
Rank 3 lowest response rate SBA for 2010 data is SBA 305 , with predicted response rate 0.5920356163993876
Rank 4 lowest response rate SBA for 2010 data is SBA 302 , with predicted response rate 0.5935308904467711
Rank 5 lowest response rate SBA for 2010 data is SBA 110 , with predicted response rate 0.6014560621503904
Rank 6 lowest response rate SBA for 2010 data is SBA 403 , with predicted response rate 0.6021841737194458
Rank 7 lowest response rate SBA for 2010 data is SBA 202 , with predicted response rate 0.6025614305772403
Rank 8 lowest response rate SBA for 2010 data is SBA 413 , with predicted response rate 0.6035589340029346
Rank 9 lowest response rate SBA for 2010 data is SBA 203 , with predicted response rate 0.6058357715133263
Rank 10 lowest response rate SBA for 