In [1]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn as snsbb
from scipy.stats import t
from scipy.stats import norm
from linearmodels import PanelOLS
from linearmodels.panel import compare
import geopandas as gpd



In [2]:
pre_ts_analysis = pd.read_csv('Research_Firearm/pre_ts_analysis_clean.csv')



In [3]:
pre_ts_analysis.columns

Index(['Year', 'Precinct', 'Full Time Positions', 'Budget', 'Borough',
       'MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT',
       'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE',
       'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Population_Year'],
      dtype='object')

In [4]:
pre_ts_analysis

Unnamed: 0,Year,Precinct,Full Time Positions,Budget,Borough,MURDER & NON NEGL. MANSLAUGHTER,RAPE,ROBBERY,FELONY ASSAULT,BURGLARY,GRAND LARCENY,GRAND LARCENY OF MOTOR VEHICLE,TOTAL SEVEN MAJOR FELONY OFFENSES,Population_Year
0,2006,1,219,11001943,MANHATTAN SOUTH,1,4,119,94,255,1462,78,2013,59431
1,2006,5,240,12354423,MANHATTAN SOUTH,2,5,132,104,152,605,40,1040,53356
2,2006,6,237,10716126,MANHATTAN SOUTH,3,6,214,123,280,1283,70,1979,61259
3,2006,7,174,7786080,MANHATTAN SOUTH,4,7,176,105,127,362,83,864,55703
4,2006,9,234,10190005,MANHATTAN SOUTH,1,14,252,165,297,775,75,1579,76639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1308,2023,114,252,20752787,QUEENS NORTH,8,40,279,509,224,920,430,2410,210231
1309,2023,115,289,16525730,QUEENS NORTH,4,27,365,481,136,863,337,2213,181382
1310,2023,120,399,29273345,STATEN ISLAND,12,19,163,470,142,414,156,1376,125104
1311,2023,122,249,19873478,STATEN ISLAND,2,8,50,169,100,474,109,912,146208


## Define Violent and Crime, also calculate the percentage based crime and budget allocation on precinct level

In [5]:
# Create 'Violent Crime' column
pre_ts_analysis['Violent Crime'] = pre_ts_analysis['FELONY ASSAULT'] + pre_ts_analysis['MURDER & NON NEGL. MANSLAUGHTER'] + pre_ts_analysis['RAPE'] + pre_ts_analysis['ROBBERY']
# Create 'Property Crime' column
pre_ts_analysis['Property Crime'] = pre_ts_analysis['GRAND LARCENY'] + pre_ts_analysis['GRAND LARCENY OF MOTOR VEHICLE'] + pre_ts_analysis['BURGLARY']

In [6]:
# Calculate yearly totals
yearly_totals = pre_ts_analysis.groupby('Year').sum().reset_index()
# List of metrics for which want to calculate yearly totals
metrics = ['Violent Crime', 'Property Crime', 'Full Time Positions']
# Rename columns to *_by_year
yearly_totals = yearly_totals.rename(columns={metric: f"{metric.lower()}_by_year" for metric in metrics})
# Merge yearly totals with merged_data
merged_data = pd.merge(pre_ts_analysis, yearly_totals[['Year'] + [f"{metric.lower()}_by_year" for metric in metrics]], 
                       on='Year', how='left')

# List of metrics for which to calculate yearly totals and per capita rates
metrics = ['Violent Crime', 'Property Crime','Full Time Positions']
# Loop through each metric
for metric in metrics:
    # Compute the per capita rate
    per_capita_col_name = f"{metric}_per_capita"
    merged_data[per_capita_col_name] = merged_data[metric] / merged_data['Population_Year']    
    # Compute the percentage-based rate
    pct_col_name = f"{metric}_pct"
    yearly_col_name = f"{metric.lower()}_by_year"
    merged_data[pct_col_name] = merged_data[metric] / merged_data[yearly_col_name]

In [7]:
pre_ts_analysis = merged_data

In [8]:
# Calculate yearly totals
yearly_totals = pre_ts_analysis.groupby('Year').sum().reset_index()
# List of metrics for which want to calculate yearly totals
metrics = ['MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT', 
    'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE', 
    'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Budget']
# Rename columns to *_by_year
yearly_totals = yearly_totals.rename(columns={metric: f"{metric.lower()}_by_year" for metric in metrics})
# Merge yearly totals with merged_data
merged_data = pd.merge(pre_ts_analysis, yearly_totals[['Year'] + [f"{metric.lower()}_by_year" for metric in metrics]], 
                       on='Year', how='left')

# List of metrics for which to calculate yearly totals and per capita rates
metrics = ['MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT', 
    'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE', 
    'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Budget']
# Loop through each metric
for metric in metrics:
    # Compute the per capita rate
    per_capita_col_name = f"{metric}_per_capita"
    merged_data[per_capita_col_name] = merged_data[metric] / merged_data['Population_Year']    
    # Compute the percentage-based rate
    pct_col_name = f"{metric}_pct"
    yearly_col_name = f"{metric.lower()}_by_year"
    merged_data[pct_col_name] = merged_data[metric] / merged_data[yearly_col_name]

In [9]:
pre_ts_analysis = merged_data

In [10]:
pre_ts_analysis

Unnamed: 0,Year,Precinct,Full Time Positions,Budget,Borough,MURDER & NON NEGL. MANSLAUGHTER,RAPE,ROBBERY,FELONY ASSAULT,BURGLARY,...,BURGLARY_per_capita,BURGLARY_pct,GRAND LARCENY_per_capita,GRAND LARCENY_pct,GRAND LARCENY OF MOTOR VEHICLE_per_capita,GRAND LARCENY OF MOTOR VEHICLE_pct,TOTAL SEVEN MAJOR FELONY OFFENSES_per_capita,TOTAL SEVEN MAJOR FELONY OFFENSES_pct,Budget_per_capita,Budget_pct
0,2006,1,219,11001943,MANHATTAN SOUTH,1,4,119,94,255,...,0.004291,0.011469,0.024600,0.035392,0.001312,0.004985,0.033871,0.016574,185.121284,0.014186
1,2006,5,240,12354423,MANHATTAN SOUTH,2,5,132,104,152,...,0.002849,0.006836,0.011339,0.014646,0.000750,0.002557,0.019492,0.008563,231.547024,0.015929
2,2006,6,237,10716126,MANHATTAN SOUTH,3,6,214,123,280,...,0.004571,0.012593,0.020944,0.031059,0.001143,0.004474,0.032305,0.016294,174.931455,0.013817
3,2006,7,174,7786080,MANHATTAN SOUTH,4,7,176,105,127,...,0.002280,0.005712,0.006499,0.008763,0.001490,0.005305,0.015511,0.007114,139.778468,0.010039
4,2006,9,234,10190005,MANHATTAN SOUTH,1,14,252,165,297,...,0.003875,0.013358,0.010112,0.018761,0.000979,0.004794,0.020603,0.013000,132.961090,0.013139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1308,2023,114,252,20752787,QUEENS NORTH,8,40,279,509,224,...,0.001065,0.017165,0.004376,0.019979,0.002045,0.027816,0.011464,0.020210,98.714210,0.016160
1309,2023,115,289,16525730,QUEENS NORTH,4,27,365,481,136,...,0.000750,0.010421,0.004758,0.018741,0.001858,0.021800,0.012201,0.018558,91.110088,0.012868
1310,2023,120,399,29273345,STATEN ISLAND,12,19,163,470,142,...,0.001135,0.010881,0.003309,0.008991,0.001247,0.010091,0.010999,0.011539,233.992079,0.022794
1311,2023,122,249,19873478,STATEN ISLAND,2,8,50,169,100,...,0.000684,0.007663,0.003242,0.010294,0.000746,0.007051,0.006238,0.007648,135.926064,0.015475


In [11]:
pre_ts_analysis.groupby('Year')['Violent Crime_pct'].sum()

Year
2006    1.0
2007    1.0
2008    1.0
2009    1.0
2010    1.0
2011    1.0
2012    1.0
2013    1.0
2014    1.0
2015    1.0
2016    1.0
2017    1.0
2018    1.0
2019    1.0
2020    1.0
2021    1.0
2022    1.0
2023    1.0
Name: Violent Crime_pct, dtype: float64

## Create lag 1 for meature previous one year for crime and budget variable

In [12]:
# Get all columns ending with '_pct' or '_per_capita'
cols_to_lag = [col for col in pre_ts_analysis.columns if col.endswith('_pct') or col.endswith('_per_capita')]

# Loop through the columns and create a lag-1 column for each
for col in cols_to_lag:
    lag_col_name = col + '_lag1'  # Name of the new lag column
    pre_ts_analysis[lag_col_name] = pre_ts_analysis.groupby('Precinct')[col].shift(1)

In [13]:
pre_ts_analysis.columns

Index(['Year', 'Precinct', 'Full Time Positions', 'Budget', 'Borough',
       'MURDER & NON NEGL. MANSLAUGHTER', 'RAPE', 'ROBBERY', 'FELONY ASSAULT',
       'BURGLARY', 'GRAND LARCENY', 'GRAND LARCENY OF MOTOR VEHICLE',
       'TOTAL SEVEN MAJOR FELONY OFFENSES', 'Population_Year', 'Violent Crime',
       'Property Crime', 'violent crime_by_year', 'property crime_by_year',
       'full time positions_by_year', 'Violent Crime_per_capita',
       'Violent Crime_pct', 'Property Crime_per_capita', 'Property Crime_pct',
       'Full Time Positions_per_capita', 'Full Time Positions_pct',
       'murder & non negl. manslaughter_by_year', 'rape_by_year',
       'robbery_by_year', 'felony assault_by_year', 'burglary_by_year',
       'grand larceny_by_year', 'grand larceny of motor vehicle_by_year',
       'total seven major felony offenses_by_year', 'budget_by_year',
       'MURDER & NON NEGL. MANSLAUGHTER_per_capita',
       'MURDER & NON NEGL. MANSLAUGHTER_pct', 'RAPE_per_capita', 'RAPE_pct',

# Paper Analysis

In [14]:
# Creating a lagged version of the Violent and Property Crime

# Defining the regression formula
regression_formula = "Budget_per_capita ~ Q('Violent Crime_per_capita_lag1') + Q('Property Crime_per_capita_lag1') + Budget_per_capita_lag1"

# Running the regression
model = smf.ols(formula=regression_formula, data=pre_ts_analysis).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:      Budget_per_capita   R-squared:                       0.941
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     6542.
Date:                Thu, 16 May 2024   Prob (F-statistic):               0.00
Time:                        10:04:34   Log-Likelihood:                -5128.6
No. Observations:                1240   AIC:                         1.027e+04
Df Residuals:                    1236   BIC:                         1.029e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                          coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
In

## Paper findings: Vio+Property

In [15]:
# Creating a lagged version of the Violent and Property Crime pct

# Defining the regression formula
regression_formula = "Budget_pct ~ Q('Violent Crime_pct_lag1') + Q('Property Crime_pct_lag1') + Budget_pct_lag1"

# Running the regression
model = smf.ols(formula=regression_formula, data=pre_ts_analysis).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.907
Method:                 Least Squares   F-statistic:                     4045.
Date:                Thu, 16 May 2024   Prob (F-statistic):               0.00
Time:                        10:04:34   Log-Likelihood:                 6793.4
No. Observations:                1240   AIC:                        -1.358e+04
Df Residuals:                    1236   BIC:                        -1.356e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept       

In [16]:
filtered_pre_ts_analysis = pre_ts_analysis[(pre_ts_analysis['Year'] >= 2007) & (pre_ts_analysis['Year'] <= 2023)]


In [17]:
# Reset display options to default
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')

## Paper findings: Vio+Property S

In [28]:



# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['Violent Crime_pct_lag1', 'Property Crime_pct_lag1', 'Budget_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.908
Model:                            OLS   Adj. R-squared:                  0.907
Method:                 Least Squares   F-statistic:                     4045.
Date:                Thu, 16 May 2024   Prob (F-statistic):               0.00
Time:                        10:06:06   Log-Likelihood:                -282.62
No. Observations:                1240   AIC:                             573.2
Df Residuals:                    1236   BIC:                             593.7
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                   -9

## Paper Findings: VIF

In [19]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','FELONY ASSAULT_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'ROBBERY_pct_lag1', 'Budget_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.528083
1                         BURGLARY_pct_lag1  16.414193
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1   8.663947
3                   FELONY ASSAULT_pct_lag1  32.980409
4  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   6.563781
5                             RAPE_pct_lag1  13.413026
6                          ROBBERY_pct_lag1  32.808150
7                           Budget_pct_lag1  15.690278


In [20]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'ROBBERY_pct_lag1', 'Budget_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.198934
1                         BURGLARY_pct_lag1  16.279178
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1   8.663947
3  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   5.720214
4                             RAPE_pct_lag1  12.592054
5                          ROBBERY_pct_lag1  17.812288
6                           Budget_pct_lag1  14.813462


In [21]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','BURGLARY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Budget_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   7.167830
1                         BURGLARY_pct_lag1  15.273549
2   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1   8.619399
3  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   4.581264
4                             RAPE_pct_lag1   9.823855
5                           Budget_pct_lag1  14.768607


In [22]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1', 'Budget_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable        VIF
0                    GRAND LARCENY_pct_lag1   6.026963
1   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1   5.584618
2  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1   4.570835
3                             RAPE_pct_lag1   9.753610
4                           Budget_pct_lag1  14.099469


In [23]:
predictors_df_2 = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
# Add a constant to the model (intercept)

# Calculate VIF for each predictor variable
vif = pd.DataFrame()
vif["Variable"] = predictors_df_2.columns
vif["VIF"] = [variance_inflation_factor(predictors_df_2.values, i) for i in range(predictors_df_2.shape[1])]

print(vif)

                                   Variable       VIF
0                    GRAND LARCENY_pct_lag1  3.188364
1   GRAND LARCENY OF MOTOR VEHICLE_pct_lag1  5.426184
2  MURDER & NON NEGL. MANSLAUGHTER_pct_lag1  4.180618
3                             RAPE_pct_lag1  8.294728


In [24]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']
X = sm.add_constant(filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']])

# Fit the OLS regression model
model_4 = sm.OLS(y, X).fit()
print(model_4.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.593
Model:                            OLS   Adj. R-squared:                  0.592
Method:                 Least Squares   F-statistic:                     600.4
Date:                Thu, 16 May 2024   Prob (F-statistic):          1.06e-240
Time:                        10:04:35   Log-Likelihood:                 5874.4
No. Observations:                1240   AIC:                        -1.174e+04
Df Residuals:                    1236   BIC:                        -1.172e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

In [25]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']
X = sm.add_constant(filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']])

# Fit the OLS regression model
model_4 = sm.OLS(y, X).fit()
print(model_4.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.594
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     451.9
Date:                Thu, 16 May 2024   Prob (F-statistic):          5.68e-240
Time:                        10:04:35   Log-Likelihood:                 5876.0
No. Observations:                1240   AIC:                        -1.174e+04
Df Residuals:                    1235   BIC:                        -1.172e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

## Paper Findings: Index Crime

In [26]:


# Assuming 'filtered_pre_ts_analysis' is your DataFrame with proper columns.
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1', 'MURDER & NON NEGL. MANSLAUGHTER_pct_lag1','RAPE_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns, index=X.index)  # Match the indices

# Standardize the dependent variable
y_mean = y.mean()
y_std = y.std()
y_standardized = (y - y_mean) / y_std
y_standardized.index = X.index  # Ensure the indices are the same

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)  # This will now retain the column names and index

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.594
Model:                            OLS   Adj. R-squared:                  0.593
Method:                 Least Squares   F-statistic:                     451.9
Date:                Thu, 16 May 2024   Prob (F-statistic):          5.68e-240
Time:                        10:04:35   Log-Likelihood:                -1199.9
No. Observations:                1240   AIC:                             2410.
Df Residuals:                    1235   BIC:                             2435.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

In [27]:
X = filtered_pre_ts_analysis[['GRAND LARCENY_pct_lag1','GRAND LARCENY OF MOTOR VEHICLE_pct_lag1','MURDER & NON NEGL. MANSLAUGHTER_pct_lag1']]
y = filtered_pre_ts_analysis['Budget_pct']

# Standardize the features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Standardize the dependent variable
y_standardized = (y - y.mean()) / y.std()

# Add a constant to the model (intercept)
X_standardized = sm.add_constant(X_standardized)

# Fit the OLS model with standardized variables
model_standardized = sm.OLS(y_standardized, X_standardized).fit()

# Fit the OLS regression model
print(model_standardized.summary())

                            OLS Regression Results                            
Dep. Variable:             Budget_pct   R-squared:                       0.521
Model:                            OLS   Adj. R-squared:                  0.520
Method:                 Least Squares   F-statistic:                     448.4
Date:                Thu, 16 May 2024   Prob (F-statistic):          4.59e-197
Time:                        10:04:35   Log-Likelihood:                -1302.4
No. Observations:                1240   AIC:                             2613.
Df Residuals:                    1236   BIC:                             2633.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -9.454e-17      0.020  -4.81e-15      1.0