In [1]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn as snsbb
from scipy.stats import t
from scipy.stats import norm
from linearmodels import PanelOLS
from linearmodels.panel import compare
import geopandas as gpd



In [2]:
pre_ts_analysis = pd.read_csv('Research_Firearm/pre_ts_analysis_clean.csv')



In [3]:
pre_ts_analysis.columns

Index(['Year', 'Precinct', 'Full Time Positions', 'Budget', 'Borough',
       'MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT',
       'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE',
       'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Population_Year'],
      dtype='object')

In [6]:
precinct_counts = pre_ts_analysis['Precinct'].value_counts()
print(precinct_counts.to_string())

Precinct
1      18
68     18
94     18
90     18
88     18
84     18
83     18
81     18
79     18
78     18
77     18
76     18
75     18
73     18
72     18
71     18
70     18
100    18
101    18
102    18
111    18
122    18
120    18
115    18
114    18
113    18
112    18
110    18
103    18
109    18
108    18
107    18
106    18
105    18
104    18
69     18
67     18
5      18
66     18
32     18
30     18
28     18
26     18
25     18
24     18
23     18
20     18
19     18
17     18
13     18
10     18
9      18
7      18
6      18
33     18
34     18
40     18
49     18
63     18
62     18
61     18
60     18
52     18
50     18
48     18
41     18
47     18
46     18
45     18
44     18
43     18
42     18
123    18


In [7]:
pre_ts_analysis

Unnamed: 0,Year,Precinct,Full Time Positions,Budget,Borough,MURDER & NON NEGL. MANSLAUGHTER,RAPE,ROBBERY,FELONY ASSAULT,BURGLARY,GRAND LARCENY,GRAND LARCENY OF MOTOR VEHICLE,TOTAL SEVEN MAJOR FELONY OFFENSES,Population_Year
0,2006,1,219,11001943,MANHATTAN SOUTH,1,4,119,94,255,1462,78,2013,59431
1,2006,5,240,12354423,MANHATTAN SOUTH,2,5,132,104,152,605,40,1040,53356
2,2006,6,237,10716126,MANHATTAN SOUTH,3,6,214,123,280,1283,70,1979,61259
3,2006,7,174,7786080,MANHATTAN SOUTH,4,7,176,105,127,362,83,864,55703
4,2006,9,234,10190005,MANHATTAN SOUTH,1,14,252,165,297,775,75,1579,76639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1309,2023,114,252,20752787,QUEENS NORTH,8,40,279,509,224,920,430,2410,210231
1310,2023,115,289,16525730,QUEENS NORTH,4,27,365,481,136,863,337,2213,181382
1311,2023,120,399,29273345,STATEN ISLAND,12,19,163,470,142,414,156,1376,125104
1312,2023,122,249,19873478,STATEN ISLAND,2,8,50,169,100,474,109,912,146208


In [5]:
# Filtering the DataFrame for precincts 6, 76, and 83
filtered_df = pre_ts_analysis[pre_ts_analysis['Precinct'].isin([6, 76, 83])]

# Grouping by 'Precinct' and 'Year' and counting occurrences
precinct_year_counts = filtered_df.groupby(['Precinct', 'Year']).size().reset_index(name='Count')

# Display the DataFrame
print(precinct_year_counts)

    Precinct  Year  Count
0          6  2006      1
1          6  2007      1
2          6  2008      1
3          6  2009      1
4          6  2010      1
5          6  2011      1
6          6  2012      1
7          6  2013      1
8          6  2014      1
9          6  2015      1
10         6  2016      1
11         6  2017      1
12         6  2018      1
13         6  2019      1
14         6  2020      1
15         6  2021      1
16         6  2022      1
17         6  2023      1
18        76  2006      1
19        76  2007      1
20        76  2008      1
21        76  2009      1
22        76  2010      1
23        76  2011      1
24        76  2012      1
25        76  2013      1
26        76  2014      1
27        76  2015      1
28        76  2016      1
29        76  2017      1
30        76  2018      1
31        76  2019      1
32        76  2020      1
33        76  2021      1
34        76  2022      1
35        76  2023      1
36        83  2006      1
37        83

## Define Violent and Crime, also calculate the percentage based crime and budget allocation on precinct level

In [5]:
# Create 'Violent Crime' column
pre_ts_analysis['Violent Crime'] = pre_ts_analysis['FELONY ASSAULT'] + pre_ts_analysis['MURDER & NON NEGL. MANSLAUGHTER'] + pre_ts_analysis['RAPE'] + pre_ts_analysis['ROBBERY']
# Create 'Property Crime' column
pre_ts_analysis['Property Crime'] = pre_ts_analysis['GRAND LARCENY'] + pre_ts_analysis['GRAND LARCENY OF MOTOR VEHICLE'] + pre_ts_analysis['BURGLARY']

In [6]:
# Calculate yearly totals
yearly_totals = pre_ts_analysis.groupby('Year').sum().reset_index()
# List of metrics for which want to calculate yearly totals
metrics = ['Violent Crime', 'Property Crime', 'Full Time Positions']
# Rename columns to *_by_year
yearly_totals = yearly_totals.rename(columns={metric: f"{metric.lower()}_by_year" for metric in metrics})
# Merge yearly totals with merged_data
merged_data = pd.merge(pre_ts_analysis, yearly_totals[['Year'] + [f"{metric.lower()}_by_year" for metric in metrics]], 
                       on='Year', how='left')

# List of metrics for which to calculate yearly totals and per capita rates
metrics = ['Violent Crime', 'Property Crime','Full Time Positions']
# Loop through each metric
for metric in metrics:
    # Compute the per capita rate
    per_capita_col_name = f"{metric}_per_capita"
    merged_data[per_capita_col_name] = merged_data[metric] / merged_data['Population_Year']    
    # Compute the percentage-based rate
    pct_col_name = f"{metric}_pct"
    yearly_col_name = f"{metric.lower()}_by_year"
    merged_data[pct_col_name] = merged_data[metric] / merged_data[yearly_col_name]

In [7]:
pre_ts_analysis = merged_data

In [8]:
# Calculate yearly totals
yearly_totals = pre_ts_analysis.groupby('Year').sum().reset_index()
# List of metrics for which want to calculate yearly totals
metrics = ['MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT', 
    'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE', 
    'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Budget','Population_Year']
# Rename columns to *_by_year
yearly_totals = yearly_totals.rename(columns={metric: f"{metric.lower()}_by_year" for metric in metrics})
# Merge yearly totals with merged_data
merged_data = pd.merge(pre_ts_analysis, yearly_totals[['Year'] + [f"{metric.lower()}_by_year" for metric in metrics]], 
                       on='Year', how='left')

# List of metrics for which to calculate yearly totals and per capita rates
metrics = ['MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT', 
    'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE', 
    'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Budget','Population_Year']
# Loop through each metric
for metric in metrics:
    # Compute the per capita rate
    per_capita_col_name = f"{metric}_per_capita"
    merged_data[per_capita_col_name] = merged_data[metric] / merged_data['Population_Year']    
    # Compute the percentage-based rate
    pct_col_name = f"{metric}_pct"
    yearly_col_name = f"{metric.lower()}_by_year"
    merged_data[pct_col_name] = merged_data[metric] / merged_data[yearly_col_name]

In [9]:
pre_ts_analysis = merged_data

In [10]:
pre_ts_analysis['Population_Year_pct']

0       0.007676
1       0.006892
2       0.007912
3       0.007195
4       0.009899
          ...   
1308    0.023993
1309    0.020701
1310    0.014278
1311    0.016686
1312    0.011588
Name: Population_Year_pct, Length: 1313, dtype: float64

In [11]:
pre_ts_analysis.groupby('Year')['Population_Year_pct'].sum()

Year
2006    1.0
2007    1.0
2008    1.0
2009    1.0
2010    1.0
2011    1.0
2012    1.0
2013    1.0
2014    1.0
2015    1.0
2016    1.0
2017    1.0
2018    1.0
2019    1.0
2020    1.0
2021    1.0
2022    1.0
2023    1.0
Name: Population_Year_pct, dtype: float64

## Create lag 1 for meature previous one year for crime and budget variable

In [12]:
# Get all columns ending with '_pct' or '_per_capita'
cols_to_lag = [col for col in pre_ts_analysis.columns if col.endswith('_pct') or col.endswith('_per_capita')]

# Loop through the columns and create a lag-1 column for each
for col in cols_to_lag:
    lag_col_name = col + '_lag1'  # Name of the new lag column
    pre_ts_analysis[lag_col_name] = pre_ts_analysis.groupby('Precinct')[col].shift(1)

In [13]:
pre_ts_analysis.columns

Index(['Year', 'Precinct', 'Full Time Positions', 'Budget', 'Borough',
       'MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT',
       'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE',
       'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Population_Year', 'Violent Crime',
       'Property Crime', 'violent crime_by_year', 'property crime_by_year',
       'full time positions_by_year', 'Violent Crime_per_capita',
       'Violent Crime_pct', 'Property Crime_per_capita', 'Property Crime_pct',
       'Full Time Positions_per_capita', 'Full Time Positions_pct',
       'murder & non negl. manslaughter_by_year', 'rape_by_year',
       'robbery_by_year', 'felony assault_by_year', 'burglary_by_year',
       'grand larceny_by_year', 'grand larceny of motor vehicle_by_year',
       'total seven major felony offenses_by_year', 'budget_by_year',
       'population_year_by_year', 'MURDER & NON NEGL. MANSLAUGHTER_per_capita',
       'MURDER & NON NEGL. MANSLAUGHTER_pct', 'RA

# Paper Analysis

In [14]:
# Creating a lagged version of the Violent and Property Crime

# Defining the regression formula
regression_formula = "Budget_per_capita ~ Q('Violent Crime_per_capita_lag1') + Q('Property Crime_per_capita_lag1') + Budget_per_capita_lag1"

# Running the regression
model = smf.ols(formula=regression_formula, data=pre_ts_analysis).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:      Budget_per_capita   R-squared:                       0.941
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     6542.
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:07   Log-Likelihood:                -5128.6
No. Observations:                1240   AIC:                         1.027e+04
Df Residuals:                    1236   BIC:                         1.029e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                          coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
In

In [15]:
# Creating a lagged version of the Violent and Property Crime pct

# Defining the regression formula
regression_formula = "Budget_pct ~ Q('Violent Crime_pct_lag1') + Q('Property Crime_pct_lag1')"

# Running the regression
model = smf.ols(formula=regression_formula, data=pre_ts_analysis).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.590
Model:                            OLS   Adj. R-squared:                  0.589
Method:                 Least Squares   F-statistic:                     889.9
Date:                Mon, 20 May 2024   Prob (F-statistic):          3.44e-240
Time:                        09:59:07   Log-Likelihood:                 5869.7
No. Observations:                1240   AIC:                        -1.173e+04
Df Residuals:                    1237   BIC:                        -1.172e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept       

## Paper findings: Vio+Property

In [16]:
# Creating a lagged version of the Violent and Property Crime pct

# Defining the regression formula
regression_formula = "Budget_pct ~ Q('Violent Crime_pct_lag1') + Q('Property Crime_pct_lag1') + Budget_pct_lag1"

# Running the regression
model = smf.ols(formula=regression_formula, data=pre_ts_analysis).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.907
Method:                 Least Squares   F-statistic:                     4045.
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:07   Log-Likelihood:                 6793.4
No. Observations:                1240   AIC:                        -1.358e+04
Df Residuals:                    1236   BIC:                        -1.356e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept       

In [17]:
filtered_pre_ts_analysis = pre_ts_analysis[(pre_ts_analysis['Year'] >= 2007) & (pre_ts_analysis['Year'] <= 2023)]


In [18]:
# Reset display options to default
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')

In [19]:
predictors_df_2 = filtered_pre_ts_analysis[['Violent Crime_pct_lag1','Property Crime_pct_lag1','Budget_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                  Variable        VIF
0   Violent Crime_pct_lag1   6.895170
1  Property Crime_pct_lag1   8.626746
2          Budget_pct_lag1  14.148611


## Paper findings: Vio+Property S

In [20]:



# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1', 'Budget_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.907
Method:                 Least Squares   F-statistic:                     4045.
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:07   Log-Likelihood:                -282.62
No. Observations:                1240   AIC:                             573.2
Df Residuals:                    1236   BIC:                             593.7
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                   -9

In [21]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1', 'Budget_pct_lag1','Population_Year_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.908
Method:                 Least Squares   F-statistic:                     3041.
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:07   Log-Likelihood:                -280.79
No. Observations:                1240   AIC:                             571.6
Df Residuals:                    1235   BIC:                             597.2
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

In [22]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Violent Crime_pct_lag1','Population_Year_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.594
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     904.3
Date:                Mon, 20 May 2024   Prob (F-statistic):          9.42e-243
Time:                        09:59:07   Log-Likelihood:                -1200.3
No. Observations:                1240   AIC:                             2407.
Df Residuals:                    1237   BIC:                             2422.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

In [23]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[[ 'Budget_pct_lag1','Population_Year_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.904
Model:                            OLS   Adj. R-squared:                  0.904
Method:                 Least Squares   F-statistic:                     5828.
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:07   Log-Likelihood:                -305.69
No. Observations:                1240   AIC:                             617.4
Df Residuals:                    1237   BIC:                             632.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

In [24]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[[ 'Population_Year_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.138
Model:                            OLS   Adj. R-squared:                  0.138
Method:                 Least Squares   F-statistic:                     198.6
Date:                Mon, 20 May 2024   Prob (F-statistic):           6.08e-42
Time:                        09:59:07   Log-Likelihood:                -1666.7
No. Observations:                1240   AIC:                             3337.
Df Residuals:                    1238   BIC:                             3348.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                   

In [25]:
predictors_df_2 = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1', 'Budget_pct_lag1','Population_Year_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                   Variable        VIF
0    Violent Crime_pct_lag1   6.923662
1   Property Crime_pct_lag1  13.151147
2           Budget_pct_lag1  15.733048
3  Population_Year_pct_lag1  11.152316


In [26]:
predictors_df_2 = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1', 'Population_Year_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                   Variable        VIF
0    Violent Crime_pct_lag1   4.331113
1   Property Crime_pct_lag1  11.408721
2  Population_Year_pct_lag1  10.029193


In [27]:
predictors_df_2 = filtered_pre_ts_analysis[['Violent Crime_pct_lag1',  'Population_Year_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                   Variable       VIF
0    Violent Crime_pct_lag1  3.692949
1  Population_Year_pct_lag1  3.692949


In [28]:
predictors_df_2 = filtered_pre_ts_analysis[['Budget_pct_lag1',  'Population_Year_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                   Variable       VIF
0           Budget_pct_lag1  7.306203
1  Population_Year_pct_lag1  7.306203


In [29]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.590
Model:                            OLS   Adj. R-squared:                  0.589
Method:                 Least Squares   F-statistic:                     889.9
Date:                Mon, 20 May 2024   Prob (F-statistic):          3.44e-240
Time:                        09:59:08   Log-Likelihood:                -1206.3
No. Observations:                1240   AIC:                             2419.
Df Residuals:                    1237   BIC:                             2434.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                   -9

In [30]:
# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Budget_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.903
Model:                            OLS   Adj. R-squared:                  0.903
Method:                 Least Squares   F-statistic:                 1.158e+04
Date:                Mon, 20 May 2024   Prob (F-statistic):               0.00
Time:                        09:59:08   Log-Likelihood:                -310.05
No. Observations:                1240   AIC:                             624.1
Df Residuals:                    1238   BIC:                             634.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const           -9.454e-17      0.009  -1.

## Paper Findings: VIF

In [31]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','FELONY ASSAULT_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'ROBBERY_pct_lag1', 'Budget_pct_lag1','Population_Year_pct']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.986383
1                         BURGLARY_pct_lag1  17.792775
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  12.042410
3                   FELONY ASSAULT_pct_lag1  33.215779
4  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   6.617555
5                             RAPE_pct_lag1  13.569439
6                          ROBBERY_pct_lag1  33.704319
7                           Budget_pct_lag1  18.737731
8                       Population_Year_pct  17.850519


In [32]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','FELONY ASSAULT_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Budget_pct_lag1','Population_Year_pct']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.600391
1                         BURGLARY_pct_lag1  16.434235
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  11.837146
3                   FELONY ASSAULT_pct_lag1  19.141821
4  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   6.601833
5                             RAPE_pct_lag1  13.121020
6                           Budget_pct_lag1  18.703145
7                       Population_Year_pct  17.375890


In [33]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Budget_pct_lag1','Population_Year_pct']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.579287
1                         BURGLARY_pct_lag1  16.015216
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  11.486106
3  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   4.944485
4                             RAPE_pct_lag1   9.854419
5                           Budget_pct_lag1  17.107417
6                       Population_Year_pct  16.253908


In [34]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Population_Year_pct']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   6.410459
1                         BURGLARY_pct_lag1  15.808650
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  11.114968
3  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   4.351979
4                             RAPE_pct_lag1   8.625091
5                       Population_Year_pct  14.031784


In [35]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Population_Year_pct']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   5.340214
1   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1   9.486907
2  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   4.250275
3                             RAPE_pct_lag1   8.444362
4                       Population_Year_pct  12.942593


In [36]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable       VIF
0                    GRAND LARCENY_pct_lag1  3.188364
1   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  5.426184
2  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1  4.180618
3                             RAPE_pct_lag1  8.294728


In [37]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']
X = sm.add_constant(filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']])

# Fit the OLS regression model
model_4 = sm.OLS(y, X).fit()
print(model_4.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.593
Model:                            OLS   Adj. R-squared:                  0.592
Method:                 Least Squares   F-statistic:                     600.4
Date:                Mon, 20 May 2024   Prob (F-statistic):          1.06e-240
Time:                        09:59:08   Log-Likelihood:                 5874.4
No. Observations:                1240   AIC:                        -1.174e+04
Df Residuals:                    1236   BIC:                        -1.172e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

In [38]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']
X = sm.add_constant(filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']])

# Fit the OLS regression model
model_4 = sm.OLS(y, X).fit()
print(model_4.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.594
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     451.9
Date:                Mon, 20 May 2024   Prob (F-statistic):          5.68e-240
Time:                        09:59:08   Log-Likelihood:                 5876.0
No. Observations:                1240   AIC:                        -1.174e+04
Df Residuals:                    1235   BIC:                        -1.172e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

## Paper Findings: Index Crime

In [39]:


# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1', 'MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.594
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     451.9
Date:                Mon, 20 May 2024   Prob (F-statistic):          5.68e-240
Time:                        09:59:08   Log-Likelihood:                -1199.9
No. Observations:                1240   AIC:                             2410.
Df Residuals:                    1235   BIC:                             2435.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

In [40]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Standardize the dependent variable
y_standardized = (y - y.mean()) / y.std()

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.521
Model:                            OLS   Adj. R-squared:                  0.520
Method:                 Least Squares   F-statistic:                     448.4
Date:                Mon, 20 May 2024   Prob (F-statistic):          4.59e-197
Time:                        09:59:08   Log-Likelihood:                -1302.4
No. Observations:                1240   AIC:                             2613.
Df Residuals:                    1236   BIC:                             2633.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -9.454e-17      0.020  -4.81e-15      1.0