In [1]:
import numpy as np
import pandas as pd
import dill as dl
import matplotlib.pyplot as plt
import sys
import pickle
import itertools
import re
import os
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as sm_f

In [2]:
pd.set_option("max_columns", None)
pd.set_option("max_rows", None)

# >= 50 specification

In [3]:
with open("D:/DZ/Course_6/Diploma/Data/two_time_points_50.csv", "rb") as inp:
    two_time_points = pd.read_csv(inp, index_col=0)

In [4]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.334
Model:,OLS,Adj. R-squared:,0.304
Method:,Least Squares,F-statistic:,10.64
Date:,"Wed, 10 May 2023",Prob (F-statistic):,2e-13
Time:,21:37:36,Log-Likelihood:,-932.96
No. Observations:,209,AIC:,1886.0
Df Residuals:,199,BIC:,1919.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,55.9316,3.935,14.215,0.000,48.220,63.643
C(region)[T.Мурманская область],-18.4299,6.175,-2.984,0.003,-30.533,-6.327
C(region)[T.Новгородская область],-1.7230,5.024,-0.343,0.732,-11.570,8.124
C(region)[T.Республика Алтай],23.3910,6.009,3.892,0.000,11.613,35.169
C(region)[T.Республика Марий Эл],19.5901,4.623,4.238,0.000,10.529,28.651
C(region)[T.Тульская область],-6.2783,4.987,-1.259,0.208,-16.053,3.497
C(region)[T.Хабаровский край],-22.7844,6.221,-3.663,0.000,-34.977,-10.592
treatment_period,-1.8380,3.312,-0.555,0.579,-8.330,4.654
treatment_1,8.4229,6.764,1.245,0.213,-4.834,21.680

0,1,2,3
Omnibus:,0.847,Durbin-Watson:,1.436
Prob(Omnibus):,0.655,Jarque-Bera (JB):,0.607
Skew:,-0.119,Prob(JB):,0.738
Kurtosis:,3.113,Cond. No.,9.4


In [5]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.01
Model:,OLS,Adj. R-squared:,-0.005
Method:,Least Squares,F-statistic:,10.32
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00878
Time:,21:37:36,Log-Likelihood:,-974.35
No. Observations:,209,AIC:,1957.0
Df Residuals:,205,BIC:,1970.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.8545,6.133,9.271,0.000,44.835,68.874
treatment_period,-3.8821,1.588,-2.444,0.015,-6.995,-0.769
treatment_1,6.1475,5.626,1.093,0.274,-4.878,17.173
treatment_period:treatment_1,-1.8754,1.959,-0.957,0.338,-5.715,1.964

0,1,2,3
Omnibus:,26.823,Durbin-Watson:,0.964
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8.938
Skew:,-0.21,Prob(JB):,0.0115
Kurtosis:,2.078,Cond. No.,8.95


In [6]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.055
Model:,OLS,Adj. R-squared:,0.037
Method:,Least Squares,F-statistic:,2.144
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.183
Time:,21:37:36,Log-Likelihood:,-636.38
No. Observations:,158,AIC:,1281.0
Df Residuals:,154,BIC:,1293.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.1609,1.389,2.276,0.023,0.439,5.882
treatment_period,6.0677,5.287,1.148,0.251,-4.296,16.431
treatment_1,2.6502,3.139,0.844,0.399,-3.502,8.803
treatment_period:treatment_1,1.6587,6.753,0.246,0.806,-11.577,14.894

0,1,2,3
Omnibus:,135.939,Durbin-Watson:,2.099
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1510.905
Skew:,3.204,Prob(JB):,0.0
Kurtosis:,16.727,Cond. No.,7.97


In [7]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.275
Model:,OLS,Adj. R-squared:,0.226
Method:,Least Squares,F-statistic:,1.229
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.277
Time:,21:37:36,Log-Likelihood:,-615.42
No. Observations:,158,AIC:,1253.0
Df Residuals:,147,BIC:,1287.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,32.0441,20.132,1.592,0.111,-7.413,71.501
C(region)[T.Мурманская область],-36.7244,24.453,-1.502,0.133,-84.651,11.202
C(region)[T.Новгородская область],-28.5872,20.334,-1.406,0.160,-68.441,11.266
C(region)[T.Приморский край],-28.4326,20.443,-1.391,0.164,-68.501,11.635
C(region)[T.Республика Алтай],-28.2025,20.933,-1.347,0.178,-69.230,12.825
C(region)[T.Республика Марий Эл],-34.5247,20.651,-1.672,0.095,-75.000,5.951
C(region)[T.Тульская область],-33.3692,20.278,-1.646,0.100,-73.113,6.375
C(region)[T.Хабаровский край],-10.3775,26.601,-0.390,0.696,-62.514,41.759
treatment_period,7.5803,3.039,2.494,0.013,1.624,13.536

0,1,2,3
Omnibus:,79.04,Durbin-Watson:,2.496
Prob(Omnibus):,0.0,Jarque-Bera (JB):,324.513
Skew:,1.902,Prob(JB):,3.41e-71
Kurtosis:,8.9,Cond. No.,20.3


In [8]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.029
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.9893
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.459
Time:,21:37:36,Log-Likelihood:,159.88
No. Observations:,103,AIC:,-311.8
Df Residuals:,99,BIC:,-301.2
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0164,0.005,3.443,0.001,0.007,0.026
treatment_period,0.0063,0.012,0.536,0.592,-0.017,0.029
treatment_1,0.0519,0.032,1.645,0.100,-0.010,0.114
treatment_period:treatment_1,-0.0615,0.036,-1.723,0.085,-0.132,0.008

0,1,2,3
Omnibus:,147.298,Durbin-Watson:,2.192
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4980.436
Skew:,5.213,Prob(JB):,0.0
Kurtosis:,35.431,Cond. No.,10.8


In [9]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.052
Model:,OLS,Adj. R-squared:,-0.04
Method:,Least Squares,F-statistic:,0.3392
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.96
Time:,21:37:36,Log-Likelihood:,161.1
No. Observations:,103,AIC:,-302.2
Df Residuals:,93,BIC:,-275.8
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0097,0.009,1.133,0.257,-0.007,0.026
C(region)[T.Мурманская область],0.0208,0.031,0.680,0.496,-0.039,0.081
C(region)[T.Новгородская область],-0.0022,0.010,-0.231,0.818,-0.021,0.017
C(region)[T.Приморский край],0.0105,0.014,0.749,0.454,-0.017,0.038
C(region)[T.Республика Алтай],-0.0077,0.227,-0.034,0.973,-0.452,0.437
C(region)[T.Тульская область],-0.0029,0.009,-0.312,0.755,-0.021,0.015
C(region)[T.Хабаровский край],0.0071,0.009,0.798,0.425,-0.010,0.025
treatment_period,0.0072,0.012,0.620,0.535,-0.016,0.030
treatment_1,0.0524,0.080,0.652,0.514,-0.105,0.210

0,1,2,3
Omnibus:,140.135,Durbin-Watson:,2.179
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4052.556
Skew:,4.863,Prob(JB):,0.0
Kurtosis:,32.15,Cond. No.,14.6


In [10]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.027
Model:,OLS,Adj. R-squared:,-0.033
Method:,Least Squares,F-statistic:,0.3548
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.786
Time:,21:37:36,Log-Likelihood:,126.45
No. Observations:,53,AIC:,-244.9
Df Residuals:,49,BIC:,-237.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0070,0.007,0.965,0.334,-0.007,0.021
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0047,0.042,-0.111,0.911,-0.087,0.078

0,1,2,3
Omnibus:,39.019,Durbin-Watson:,1.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,101.892
Skew:,2.187,Prob(JB):,7.489999999999999e-23
Kurtosis:,8.197,Cond. No.,9.67


In [11]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.027
Model:,OLS,Adj. R-squared:,-0.033
Method:,Least Squares,F-statistic:,0.3548
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.786
Time:,21:37:36,Log-Likelihood:,126.45
No. Observations:,53,AIC:,-244.9
Df Residuals:,49,BIC:,-237.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0070,0.007,0.965,0.334,-0.007,0.021
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0047,0.042,-0.111,0.911,-0.087,0.078

0,1,2,3
Omnibus:,39.019,Durbin-Watson:,1.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,101.892
Skew:,2.187,Prob(JB):,7.489999999999999e-23
Kurtosis:,8.197,Cond. No.,9.67


In [12]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.008
Model:,OLS,Adj. R-squared:,-0.011
Method:,Least Squares,F-statistic:,1.007
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.462
Time:,21:37:36,Log-Likelihood:,245.99
No. Observations:,167,AIC:,-484.0
Df Residuals:,163,BIC:,-471.5
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0272,0.011,2.386,0.017,0.005,0.050
treatment_period,-0.0085,0.011,-0.804,0.421,-0.029,0.012
treatment_1,-0.0102,0.017,-0.585,0.559,-0.044,0.024
treatment_period:treatment_1,0.0020,0.016,0.124,0.901,-0.029,0.033

0,1,2,3
Omnibus:,323.75,Durbin-Watson:,1.894
Prob(Omnibus):,0.0,Jarque-Bera (JB):,94726.728
Skew:,10.063,Prob(JB):,0.0
Kurtosis:,117.928,Cond. No.,10.1


In [13]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.072
Model:,OLS,Adj. R-squared:,0.026
Method:,Least Squares,F-statistic:,3.392
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00127
Time:,21:37:36,Log-Likelihood:,251.64
No. Observations:,167,AIC:,-485.3
Df Residuals:,158,BIC:,-457.2
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0189,0.006,3.087,0.002,0.007,0.031
C(region)[T.Мурманская область],0.0397,0.033,1.200,0.230,-0.025,0.105
C(region)[T.Новгородская область],-0.0058,0.005,-1.269,0.205,-0.015,0.003
C(region)[T.Приморский край],0.0037,0.005,0.739,0.460,-0.006,0.014
C(region)[T.Республика Алтай],0.0032,0.007,0.474,0.635,-0.010,0.016
C(region)[T.Хабаровский край],0.0187,0.007,2.607,0.009,0.005,0.033
treatment_period,-0.0080,0.009,-0.865,0.387,-0.026,0.010
treatment_1,-0.0087,0.017,-0.509,0.610,-0.042,0.025
treatment_period:treatment_1,0.0021,0.019,0.111,0.912,-0.035,0.039

0,1,2,3
Omnibus:,315.382,Durbin-Watson:,2.006
Prob(Omnibus):,0.0,Jarque-Bera (JB):,83742.427
Skew:,9.568,Prob(JB):,0.0
Kurtosis:,111.022,Cond. No.,10.7


In [14]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [17]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_50.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [18]:
data_for_models.head()

Unnamed: 0,raion,region,treatment_1,treatment_2
0,Владивостокский,Приморский край,0,1
1,Хасанский район,Приморский край,0,1
2,Артемовский,Приморский край,0,0
3,Шкотовский район,Приморский край,0,0
4,Надеждинский район,Приморский край,0,0


In [20]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [21]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [22]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [23]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [24]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [25]:
frames = [actual, initial, difference]

In [26]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 5))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [24]:
#frames[0]

In [27]:
actual, initial, difference = frames

In [28]:
actual.head()

Unnamed: 0,region,raion,year,roads,housing,education,treatment_period,treatment_1,treatment_2
0,Приморский край,Анучинский район,2016,1915.738966,233.152838,11025.1236,0,0,1
1,Приморский край,Анучинский район,2021,,,,1,0,1
2,Приморский край,Арсеньевский,2016,928.610553,1899.621063,9445.185699,0,0,0
3,Приморский край,Арсеньевский,2021,726.384117,3508.931747,9828.53369,1,0,0
4,Приморский край,Артемовский,2016,59.130107,1628.357748,8157.6816,0,0,0


### Models for actual budgets

In [29]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.035
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,20.03
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000819
Time:,21:39:01,Log-Likelihood:,-2361.3
No. Observations:,271,AIC:,4731.0
Df Residuals:,267,BIC:,4745.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1089.3436,244.049,4.464,0.000,611.017,1567.670
treatment_period,502.6541,276.615,1.817,0.069,-39.501,1044.809
treatment_1,-407.7288,236.409,-1.725,0.085,-871.082,55.625
treatment_period:treatment_1,368.1160,346.606,1.062,0.288,-311.219,1047.451

0,1,2,3
Omnibus:,185.531,Durbin-Watson:,1.599
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1858.647
Skew:,2.701,Prob(JB):,0.0
Kurtosis:,14.637,Cond. No.,9.36


In [30]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.139
Model:,OLS,Adj. R-squared:,0.105
Method:,Least Squares,F-statistic:,4.806
Date:,"Wed, 10 May 2023",Prob (F-statistic):,2.4e-06
Time:,21:39:03,Log-Likelihood:,-2345.9
No. Observations:,271,AIC:,4714.0
Df Residuals:,260,BIC:,4753.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1473.7222,321.474,4.584,0.000,843.644,2103.800
C(region)[T.Мурманская область],517.5118,546.910,0.946,0.344,-554.412,1589.435
C(region)[T.Новгородская область],-1024.6789,362.383,-2.828,0.005,-1734.937,-314.420
C(region)[T.Приморский край],-703.9053,347.983,-2.023,0.043,-1385.940,-21.871
C(region)[T.Республика Алтай],477.6970,504.603,0.947,0.344,-511.307,1466.701
C(region)[T.Республика Марий Эл],-702.1978,381.306,-1.842,0.066,-1449.544,45.149
C(region)[T.Тульская область],-133.0467,417.748,-0.318,0.750,-951.818,685.725
C(region)[T.Хабаровский край],-269.3687,435.848,-0.618,0.537,-1123.614,584.877
treatment_period,453.3323,196.208,2.310,0.021,68.772,837.892

0,1,2,3
Omnibus:,186.249,Durbin-Watson:,1.78
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2086.948
Skew:,2.662,Prob(JB):,0.0
Kurtosis:,15.509,Cond. No.,9.88


In [31]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.385
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.324
Time:,21:39:06,Log-Likelihood:,-2913.4
No. Observations:,270,AIC:,5835.0
Df Residuals:,266,BIC:,5849.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3346.3044,1885.890,1.774,0.076,-349.971,7042.580
treatment_period,2193.0395,1528.878,1.434,0.151,-803.507,5189.586
treatment_1,-1973.7271,1815.236,-1.087,0.277,-5531.523,1584.069
treatment_period:treatment_1,-1554.6639,1573.771,-0.988,0.323,-4639.198,1529.870

0,1,2,3
Omnibus:,381.61,Durbin-Watson:,1.13
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34128.224
Skew:,6.837,Prob(JB):,0.0
Kurtosis:,56.354,Cond. No.,9.38


In [32]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.256
Model:,OLS,Adj. R-squared:,0.227
Method:,Least Squares,F-statistic:,5.395
Date:,"Wed, 10 May 2023",Prob (F-statistic):,2.97e-07
Time:,21:39:08,Log-Likelihood:,-2875.2
No. Observations:,270,AIC:,5772.0
Df Residuals:,259,BIC:,5812.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1094.1210,778.463,1.405,0.160,-431.639,2619.881
C(region)[T.Мурманская область],1832.4682,894.754,2.048,0.041,78.782,3586.155
C(region)[T.Новгородская область],-1300.6379,476.751,-2.728,0.006,-2235.052,-366.223
C(region)[T.Приморский край],-422.8949,443.887,-0.953,0.341,-1292.897,447.107
C(region)[T.Республика Алтай],729.2484,658.006,1.108,0.268,-560.420,2018.917
C(region)[T.Республика Марий Эл],-603.2599,467.605,-1.290,0.197,-1519.748,313.228
C(region)[T.Тульская область],811.2125,693.630,1.170,0.242,-548.277,2170.702
C(region)[T.Хабаровский край],1.662e+04,4456.053,3.729,0.000,7881.871,2.53e+04
treatment_period,1825.5422,1409.553,1.295,0.195,-937.131,4588.216

0,1,2,3
Omnibus:,359.816,Durbin-Watson:,1.51
Prob(Omnibus):,0.0,Jarque-Bera (JB):,31167.032
Skew:,6.121,Prob(JB):,0.0
Kurtosis:,54.191,Cond. No.,10.0


In [33]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.005
Model:,OLS,Adj. R-squared:,-0.006
Method:,Least Squares,F-statistic:,0.2025
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.892
Time:,21:39:11,Log-Likelihood:,-2882.7
No. Observations:,273,AIC:,5773.0
Df Residuals:,269,BIC:,5788.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.474e+04,2562.420,5.750,0.000,9712.827,1.98e+04
treatment_period,1195.5232,1615.606,0.740,0.459,-1971.007,4362.053
treatment_1,-17.0191,2416.207,-0.007,0.994,-4752.699,4718.660
treatment_period:treatment_1,734.2742,2041.582,0.360,0.719,-3267.153,4735.702

0,1,2,3
Omnibus:,169.927,Durbin-Watson:,0.763
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1243.204
Skew:,2.516,Prob(JB):,1.1e-270
Kurtosis:,12.163,Cond. No.,9.4


In [34]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.466
Model:,OLS,Adj. R-squared:,0.446
Method:,Least Squares,F-statistic:,31.93
Date:,"Wed, 10 May 2023",Prob (F-statistic):,5.6499999999999995e-40
Time:,21:39:14,Log-Likelihood:,-2797.7
No. Observations:,273,AIC:,5617.0
Df Residuals:,262,BIC:,5657.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.452e+04,1030.676,14.091,0.000,1.25e+04,1.65e+04
C(region)[T.Мурманская область],8115.2878,1451.400,5.591,0.000,5270.596,1.1e+04
C(region)[T.Новгородская область],-5756.9344,965.184,-5.965,0.000,-7648.661,-3865.208
C(region)[T.Приморский край],-2932.6000,1112.893,-2.635,0.008,-5113.831,-751.369
C(region)[T.Республика Алтай],8009.3167,1864.224,4.296,0.000,4355.504,1.17e+04
C(region)[T.Республика Марий Эл],-5614.7389,992.536,-5.657,0.000,-7560.073,-3669.404
C(region)[T.Тульская область],-4208.6280,1034.209,-4.069,0.000,-6235.640,-2181.616
C(region)[T.Хабаровский край],1.184e+04,2684.188,4.412,0.000,6582.482,1.71e+04
treatment_period,474.6867,918.306,0.517,0.605,-1325.161,2274.534

0,1,2,3
Omnibus:,165.012,Durbin-Watson:,1.376
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1719.643
Skew:,2.255,Prob(JB):,0.0
Kurtosis:,14.439,Cond. No.,9.93


### Models for initial budgets

In [35]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.058
Model:,OLS,Adj. R-squared:,0.044
Method:,Least Squares,F-statistic:,10.52
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00836
Time:,21:39:16,Log-Likelihood:,-1836.5
No. Observations:,215,AIC:,3681.0
Df Residuals:,211,BIC:,3695.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,487.4819,107.062,4.553,0.000,277.644,697.319
treatment_period,603.5022,213.271,2.830,0.005,185.498,1021.506
treatment_1,-8.2726,185.540,-0.045,0.964,-371.924,355.379
treatment_period:treatment_1,109.2637,409.232,0.267,0.789,-692.816,911.344

0,1,2,3
Omnibus:,252.898,Durbin-Watson:,1.861
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11008.941
Skew:,4.881,Prob(JB):,0.0
Kurtosis:,36.669,Cond. No.,9.91


In [36]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.108
Model:,OLS,Adj. R-squared:,0.069
Method:,Least Squares,F-statistic:,3.494
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000484
Time:,21:39:18,Log-Likelihood:,-1830.7
No. Observations:,215,AIC:,3681.0
Df Residuals:,205,BIC:,3715.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,624.7801,238.055,2.625,0.009,158.201,1091.359
C(region)[T.Мурманская область],483.2807,417.016,1.159,0.246,-334.056,1300.618
C(region)[T.Новгородская область],-507.6385,299.992,-1.692,0.091,-1095.613,80.336
C(region)[T.Приморский край],-11.4657,330.267,-0.035,0.972,-658.776,635.845
C(region)[T.Республика Алтай],-682.4692,382.883,-1.782,0.075,-1432.906,67.968
C(region)[T.Тульская область],-128.5598,329.815,-0.390,0.697,-774.985,517.865
C(region)[T.Хабаровский край],-280.0303,324.966,-0.862,0.389,-916.952,356.891
treatment_period,639.6744,194.511,3.289,0.001,258.440,1020.909
treatment_1,-107.5197,279.101,-0.385,0.700,-654.547,439.508

0,1,2,3
Omnibus:,257.068,Durbin-Watson:,1.971
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11925.94
Skew:,4.99,Prob(JB):,0.0
Kurtosis:,38.095,Cond. No.,10.4


In [37]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.006
Model:,OLS,Adj. R-squared:,-0.007
Method:,Least Squares,F-statistic:,0.8445
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.518
Time:,21:39:29,Log-Likelihood:,-2383.5
No. Observations:,224,AIC:,4775.0
Df Residuals:,220,BIC:,4789.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2876.4785,1839.805,1.563,0.118,-729.474,6482.431
treatment_period,749.8515,1204.631,0.622,0.534,-1611.182,3110.885
treatment_1,-2231.8004,1809.920,-1.233,0.218,-5779.178,1315.577
treatment_period:treatment_1,-593.9820,1106.746,-0.537,0.591,-2763.165,1575.201

0,1,2,3
Omnibus:,286.266,Durbin-Watson:,1.064
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13055.74
Skew:,5.717,Prob(JB):,0.0
Kurtosis:,38.611,Cond. No.,10.0


In [38]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.212
Model:,OLS,Adj. R-squared:,0.179
Method:,Least Squares,F-statistic:,9.106
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.22e-11
Time:,21:39:30,Log-Likelihood:,-2357.4
No. Observations:,224,AIC:,4735.0
Df Residuals:,214,BIC:,4769.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1234.4728,1883.224,0.656,0.512,-2456.578,4925.523
C(region)[T.Мурманская область],625.9192,1429.550,0.438,0.661,-2175.947,3427.786
C(region)[T.Новгородская область],-1469.9622,1439.238,-1.021,0.307,-4290.816,1350.892
C(region)[T.Приморский край],-726.9066,1441.226,-0.504,0.614,-3551.657,2097.844
C(region)[T.Республика Алтай],-701.4560,1345.077,-0.521,0.602,-3337.758,1934.846
C(region)[T.Тульская область],-636.7417,1305.175,-0.488,0.626,-3194.838,1921.354
C(region)[T.Хабаровский край],1.196e+04,3938.851,3.038,0.002,4244.439,1.97e+04
treatment_period,800.8665,1436.752,0.557,0.577,-2015.116,3616.849
treatment_1,-63.7845,932.744,-0.068,0.945,-1891.929,1764.360

0,1,2,3
Omnibus:,261.062,Durbin-Watson:,1.349
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10233.488
Skew:,4.922,Prob(JB):,0.0
Kurtosis:,34.615,Cond. No.,10.5


In [39]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.009
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,1.843
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.24
Time:,21:39:31,Log-Likelihood:,-2432.5
No. Observations:,231,AIC:,4873.0
Df Residuals:,227,BIC:,4887.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.278e+04,2694.067,4.744,0.000,7499.834,1.81e+04
treatment_period,1539.7164,966.880,1.592,0.111,-355.334,3434.767
treatment_1,-1392.2926,3012.337,-0.462,0.644,-7296.364,4511.779
treatment_period:treatment_1,291.4952,1131.777,0.258,0.797,-1926.747,2509.737

0,1,2,3
Omnibus:,125.847,Durbin-Watson:,0.713
Prob(Omnibus):,0.0,Jarque-Bera (JB):,703.129
Skew:,2.159,Prob(JB):,2.08e-153
Kurtosis:,10.377,Cond. No.,9.65


In [40]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.445
Model:,OLS,Adj. R-squared:,0.422
Method:,Least Squares,F-statistic:,32.54
Date:,"Wed, 10 May 2023",Prob (F-statistic):,5.97e-36
Time:,21:39:31,Log-Likelihood:,-2365.5
No. Observations:,231,AIC:,4751.0
Df Residuals:,221,BIC:,4785.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7784.8467,729.229,10.675,0.000,6355.584,9214.109
C(region)[T.Мурманская область],1.33e+04,957.134,13.895,0.000,1.14e+04,1.52e+04
C(region)[T.Новгородская область],-1301.0544,719.946,-1.807,0.071,-2712.123,110.014
C(region)[T.Приморский край],3088.5776,840.975,3.673,0.000,1440.298,4736.857
C(region)[T.Республика Алтай],9938.0377,1711.762,5.806,0.000,6583.046,1.33e+04
C(region)[T.Тульская область],619.8813,908.209,0.683,0.495,-1160.176,2399.938
C(region)[T.Хабаровский край],1.451e+04,2535.922,5.724,0.000,9544.470,1.95e+04
treatment_period,1357.5604,986.261,1.376,0.169,-575.475,3290.596
treatment_1,1376.8959,1978.335,0.696,0.486,-2500.569,5254.361

0,1,2,3
Omnibus:,116.503,Durbin-Watson:,1.246
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1096.666
Skew:,1.735,Prob(JB):,7.2799999999999996e-239
Kurtosis:,13.095,Cond. No.,10.1


### Models for difference

In [41]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.012
Method:,Least Squares,F-statistic:,0.4456
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.729
Time:,21:39:32,Log-Likelihood:,-1751.8
No. Observations:,206,AIC:,3512.0
Df Residuals:,202,BIC:,3525.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,503.6857,180.200,2.795,0.005,150.501,856.871
treatment_period,8.6123,130.801,0.066,0.948,-247.753,264.977
treatment_1,-305.2615,308.993,-0.988,0.323,-910.876,300.353
treatment_period:treatment_1,185.2856,239.314,0.774,0.439,-283.762,654.333

0,1,2,3
Omnibus:,69.478,Durbin-Watson:,1.935
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2491.993
Skew:,-0.417,Prob(JB):,0.0
Kurtosis:,20.019,Cond. No.,9.83


In [42]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.082
Model:,OLS,Adj. R-squared:,0.04
Method:,Least Squares,F-statistic:,2.92
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00287
Time:,21:39:32,Log-Likelihood:,-1743.3
No. Observations:,206,AIC:,3507.0
Df Residuals:,196,BIC:,3540.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,906.7012,262.770,3.451,0.001,391.681,1421.721
C(region)[T.Мурманская область],-125.3948,369.452,-0.339,0.734,-849.508,598.718
C(region)[T.Новгородская область],-643.4434,246.205,-2.613,0.009,-1125.996,-160.890
C(region)[T.Приморский край],-772.9877,291.701,-2.650,0.008,-1344.711,-201.264
C(region)[T.Республика Алтай],130.9403,294.112,0.445,0.656,-445.508,707.389
C(region)[T.Тульская область],39.6173,376.493,0.105,0.916,-698.295,777.529
C(region)[T.Хабаровский край],-148.4069,312.358,-0.475,0.635,-760.618,463.804
treatment_period,-109.0207,195.011,-0.559,0.576,-491.235,273.194
treatment_1,-428.8296,336.146,-1.276,0.202,-1087.663,230.004

0,1,2,3
Omnibus:,69.534,Durbin-Watson:,2.095
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2135.423
Skew:,-0.498,Prob(JB):,0.0
Kurtosis:,18.742,Cond. No.,10.4


In [43]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,3.757
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0788
Time:,21:39:33,Log-Likelihood:,-2144.9
No. Observations:,213,AIC:,4298.0
Df Residuals:,209,BIC:,4311.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,778.2006,645.856,1.205,0.228,-487.654,2044.055
treatment_period,1625.6255,774.176,2.100,0.036,108.268,3142.983
treatment_1,56.7152,657.813,0.086,0.931,-1232.574,1346.004
treatment_period:treatment_1,-1411.3020,813.412,-1.735,0.083,-3005.561,182.957

0,1,2,3
Omnibus:,87.985,Durbin-Watson:,2.073
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12057.968
Skew:,0.125,Prob(JB):,0.0
Kurtosis:,39.859,Cond. No.,9.94


In [44]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,2.0
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.115
Time:,21:39:33,Log-Likelihood:,-2144.9
No. Observations:,213,AIC:,4298.0
Df Residuals:,209,BIC:,4311.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,778.2006,634.859,1.226,0.220,-466.099,2022.501
treatment_period,1625.6255,861.271,1.887,0.059,-62.434,3313.685
treatment_1,56.7152,708.969,0.080,0.936,-1332.838,1446.268
treatment_period:treatment_1,-1411.3020,986.417,-1.431,0.153,-3344.644,522.040

0,1,2,3
Omnibus:,87.985,Durbin-Watson:,2.073
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12057.968
Skew:,0.125,Prob(JB):,0.0
Kurtosis:,39.859,Cond. No.,9.94


In [45]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.004
Model:,OLS,Adj. R-squared:,-0.01
Method:,Least Squares,F-statistic:,0.6536
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.609
Time:,21:39:34,Log-Likelihood:,-2151.0
No. Observations:,219,AIC:,4310.0
Df Residuals:,215,BIC:,4324.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2776.4708,1370.970,2.025,0.043,89.420,5463.522
treatment_period,-394.2925,1408.834,-0.280,0.780,-3155.557,2366.972
treatment_1,516.0569,812.960,0.635,0.526,-1077.315,2109.429
treatment_period:treatment_1,455.4175,980.599,0.464,0.642,-1466.522,2377.357

0,1,2,3
Omnibus:,101.783,Durbin-Watson:,1.507
Prob(Omnibus):,0.0,Jarque-Bera (JB):,421.691
Skew:,1.884,Prob(JB):,2.7e-92
Kurtosis:,8.658,Cond. No.,9.61


In [46]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.184
Model:,OLS,Adj. R-squared:,0.148
Method:,Least Squares,F-statistic:,6.027
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.65e-07
Time:,21:39:35,Log-Likelihood:,-2129.2
No. Observations:,219,AIC:,4278.0
Df Residuals:,209,BIC:,4312.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,5867.0784,1016.347,5.773,0.000,3875.076,7859.081
C(region)[T.Мурманская область],-4369.5652,988.350,-4.421,0.000,-6306.695,-2432.435
C(region)[T.Новгородская область],-3708.9147,1068.013,-3.473,0.001,-5802.181,-1615.649
C(region)[T.Приморский край],-5299.9637,1017.975,-5.206,0.000,-7295.157,-3304.770
C(region)[T.Республика Алтай],-689.4448,1344.132,-0.513,0.608,-3323.895,1945.005
C(region)[T.Тульская область],-4128.2424,1060.542,-3.893,0.000,-6206.866,-2049.619
C(region)[T.Хабаровский край],-1895.7075,1434.244,-1.322,0.186,-4706.773,915.358
treatment_period,-597.6992,603.548,-0.990,0.322,-1780.632,585.234
treatment_1,876.9088,1537.484,0.570,0.568,-2136.505,3890.322

0,1,2,3
Omnibus:,98.498,Durbin-Watson:,1.809
Prob(Omnibus):,0.0,Jarque-Bera (JB):,530.572
Skew:,1.69,Prob(JB):,6.14e-116
Kurtosis:,9.835,Cond. No.,10.1


# Lagged models

# t+1

In [47]:
with open("D:/DZ/Course_6/Diploma/Data/full_ind_data.csv", "rb") as inp:
    full_ind_data = pickle.load(inp)

In [49]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_50.csv", "rb") as inp:
    mun_data = pd.read_csv(inp, index_col=0)

In [50]:
mun_data.reset_index(inplace=True, drop=True)

In [51]:
full_ind_data.shape

(1386, 14)

In [52]:
full_ind_data.head()

Unnamed: 0,raion,region,treatment_1,treatment_2,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed
0,Владивостокский,Приморский край,1,1,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211
1,Владивостокский,Приморский край,1,1,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417
2,Владивостокский,Приморский край,1,1,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201
3,Владивостокский,Приморский край,1,1,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074
4,Владивостокский,Приморский край,1,1,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201


In [53]:
full_in_data = pd.merge(full_ind_data, mun_data, on=["raion", "region"], how="left")

In [54]:
full_ind_data = full_in_data.drop(columns=["treatment_1_x", "treatment_2_x"]).rename(columns={"treatment_1_y": "treatment_1", "treatment_2_y": "treatment_2"})

In [55]:
two_time_points = pd.DataFrame()
for i in mapping.keys():
    two_time_points = two_time_points.append(full_ind_data.loc[(full_ind_data.region == i) & ((full_ind_data.year == mapping[i]) | (full_ind_data.year == mapping[i] + 6))])

two_time_points.reset_index(inplace=True, drop=True)

two_time_points["treatment_period"] = [1 if i > 2018 else 0 for i in two_time_points.year]

In [56]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.007
Model:,OLS,Adj. R-squared:,-0.01
Method:,Least Squares,F-statistic:,0.8938
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.497
Time:,21:40:12,Log-Likelihood:,-852.04
No. Observations:,182,AIC:,1712.0
Df Residuals:,178,BIC:,1725.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.8545,6.139,9.261,0.000,44.821,68.888
treatment_period,-2.7784,3.571,-0.778,0.437,-9.777,4.221
treatment_1,6.1475,5.632,1.092,0.275,-4.890,17.185
treatment_period:treatment_1,-2.0569,4.066,-0.506,0.613,-10.026,5.912

0,1,2,3
Omnibus:,22.033,Durbin-Watson:,1.065
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8.62
Skew:,-0.284,Prob(JB):,0.0134
Kurtosis:,2.098,Cond. No.,8.34


In [57]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.354
Model:,OLS,Adj. R-squared:,0.32
Method:,Least Squares,F-statistic:,9.894
Date:,"Wed, 10 May 2023",Prob (F-statistic):,3.64e-12
Time:,21:40:16,Log-Likelihood:,-812.87
No. Observations:,182,AIC:,1646.0
Df Residuals:,172,BIC:,1678.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.4393,4.123,13.689,0.000,48.358,64.520
C(region)[T.Мурманская область],-23.4805,9.685,-2.425,0.015,-42.462,-4.499
C(region)[T.Новгородская область],-1.4277,6.533,-0.219,0.827,-14.232,11.377
C(region)[T.Республика Алтай],22.1335,6.120,3.617,0.000,10.139,34.128
C(region)[T.Республика Марий Эл],20.1486,4.556,4.422,0.000,11.219,29.078
C(region)[T.Тульская область],-6.6543,5.038,-1.321,0.187,-16.529,3.221
C(region)[T.Хабаровский край],-23.1213,6.247,-3.701,0.000,-35.366,-10.877
treatment_period,-3.2920,3.893,-0.846,0.398,-10.922,4.338
treatment_1,8.0178,6.876,1.166,0.244,-5.459,21.495

0,1,2,3
Omnibus:,1.66,Durbin-Watson:,1.62
Prob(Omnibus):,0.436,Jarque-Bera (JB):,1.268
Skew:,-0.145,Prob(JB):,0.531
Kurtosis:,3.287,Cond. No.,8.89


In [58]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.074
Model:,OLS,Adj. R-squared:,0.057
Method:,Least Squares,F-statistic:,17.46
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00315
Time:,21:40:18,Log-Likelihood:,-434.22
No. Observations:,114,AIC:,874.4
Df Residuals:,111,BIC:,882.6
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.1609,1.408,2.244,0.025,0.401,5.921
treatment_period,9.6930,1.642,5.902,0.000,6.474,12.912
treatment_1,2.6502,3.184,0.832,0.405,-3.590,8.890
treatment_period:treatment_1,0,0,,,0,0

0,1,2,3
Omnibus:,83.976,Durbin-Watson:,2.071
Prob(Omnibus):,0.0,Jarque-Bera (JB):,401.308
Skew:,2.717,Prob(JB):,7.19e-88
Kurtosis:,10.413,Cond. No.,inf


In [59]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.261
Model:,OLS,Adj. R-squared:,0.205
Method:,Least Squares,F-statistic:,2.396
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0205
Time:,21:40:19,Log-Likelihood:,-421.34
No. Observations:,114,AIC:,860.7
Df Residuals:,105,BIC:,885.3
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,28.8500,29.911,0.965,0.335,-29.774,87.474
C(region)[T.Новгородская область],-26.8579,29.933,-0.897,0.370,-85.526,31.810
C(region)[T.Приморский край],-29.0549,29.912,-0.971,0.331,-87.681,29.571
C(region)[T.Республика Алтай],-24.7709,30.107,-0.823,0.411,-83.780,34.239
C(region)[T.Республика Марий Эл],-26.2114,29.983,-0.874,0.382,-84.977,32.554
C(region)[T.Тульская область],-24.7668,29.977,-0.826,0.409,-83.521,33.987
C(region)[T.Хабаровский край],-7.1833,34.597,-0.208,0.836,-74.993,60.626
treatment_period,8.9964,5.707,1.576,0.115,-2.189,20.182
treatment_1,3.0737,3.650,0.842,0.400,-4.080,10.228

0,1,2,3
Omnibus:,59.517,Durbin-Watson:,2.297
Prob(Omnibus):,0.0,Jarque-Bera (JB):,184.391
Skew:,1.964,Prob(JB):,9.12e-41
Kurtosis:,7.837,Cond. No.,inf


In [60]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.079
Model:,OLS,Adj. R-squared:,0.032
Method:,Least Squares,F-statistic:,6.625
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0341
Time:,21:40:20,Log-Likelihood:,114.07
No. Observations:,63,AIC:,-220.1
Df Residuals:,59,BIC:,-211.6
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0164,0.005,3.361,0.001,0.007,0.026
treatment_period,-0.0042,0.006,-0.688,0.492,-0.016,0.008
treatment_1,0.0519,0.032,1.606,0.108,-0.011,0.115
treatment_period:treatment_1,-0.0610,0.033,-1.877,0.061,-0.125,0.003

0,1,2,3
Omnibus:,83.734,Durbin-Watson:,2.418
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1078.141
Skew:,3.857,Prob(JB):,7.67e-235
Kurtosis:,21.741,Cond. No.,10.3


In [61]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.135
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,0.8968
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.526
Time:,21:40:20,Log-Likelihood:,116.06
No. Observations:,63,AIC:,-214.1
Df Residuals:,54,BIC:,-194.8
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0049,0.004,1.222,0.222,-0.003,0.013
C(region)[T.Мурманская область],-0.0007,0.005,-0.137,0.891,-0.010,0.009
C(region)[T.Новгородская область],0.0117,0.013,0.892,0.373,-0.014,0.037
C(region)[T.Приморский край],0.0307,0.026,1.184,0.236,-0.020,0.082
C(region)[T.Тульская область],-0.0044,0.050,-0.088,0.930,-0.102,0.094
C(region)[T.Хабаровский край],0.0064,0.004,1.475,0.140,-0.002,0.015
treatment_period,0.0024,0.005,0.445,0.656,-0.008,0.013
treatment_1,0.0389,0.081,0.481,0.630,-0.120,0.197
treatment_period:treatment_1,-0.0433,0.115,-0.376,0.707,-0.269,0.182

0,1,2,3
Omnibus:,73.04,Durbin-Watson:,2.497
Prob(Omnibus):,0.0,Jarque-Bera (JB):,699.333
Skew:,3.275,Prob(JB):,1.39e-152
Kurtosis:,17.951,Cond. No.,12.3


In [62]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.013
Model:,OLS,Adj. R-squared:,-0.022
Method:,Least Squares,F-statistic:,0.06533
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.8
Time:,21:40:21,Log-Likelihood:,79.682
No. Observations:,30,AIC:,-155.4
Df Residuals:,28,BIC:,-152.6
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0,0,,,0,0
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,0,0,,,0,0

0,1,2,3
Omnibus:,25.292,Durbin-Watson:,1.901
Prob(Omnibus):,0.0,Jarque-Bera (JB):,45.327
Skew:,1.912,Prob(JB):,1.44e-10
Kurtosis:,7.651,Cond. No.,inf


In [63]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.006
Model:,OLS,Adj. R-squared:,-0.019
Method:,Least Squares,F-statistic:,16.8
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00482
Time:,21:40:21,Log-Likelihood:,148.59
No. Observations:,123,AIC:,-289.2
Df Residuals:,119,BIC:,-277.9
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0272,0.011,2.378,0.017,0.005,0.050
treatment_period,0.0073,0.012,0.598,0.550,-0.017,0.031
treatment_1,-0.0102,0.017,-0.583,0.560,-0.044,0.024
treatment_period:treatment_1,-0.0191,0.018,-1.086,0.277,-0.054,0.015

0,1,2,3
Omnibus:,204.495,Durbin-Watson:,2.049
Prob(Omnibus):,0.0,Jarque-Bera (JB):,16829.189
Skew:,6.941,Prob(JB):,0.0
Kurtosis:,58.597,Cond. No.,9.19


In [64]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.091
Model:,OLS,Adj. R-squared:,0.027
Method:,Least Squares,F-statistic:,4.445
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000104
Time:,21:40:22,Log-Likelihood:,154.09
No. Observations:,123,AIC:,-290.2
Df Residuals:,114,BIC:,-264.9
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0177,0.008,2.227,0.026,0.002,0.033
C(region)[T.Мурманская область],0.0698,0.064,1.087,0.277,-0.056,0.196
C(region)[T.Новгородская область],-0.0122,0.008,-1.603,0.109,-0.027,0.003
C(region)[T.Приморский край],0.0023,0.009,0.258,0.796,-0.015,0.020
C(region)[T.Республика Алтай],-0.0127,0.019,-0.672,0.502,-0.050,0.024
C(region)[T.Хабаровский край],0.0150,0.016,0.958,0.338,-0.016,0.046
treatment_period,0.0127,0.016,0.789,0.430,-0.019,0.044
treatment_1,-0.0110,0.023,-0.485,0.628,-0.055,0.033
treatment_period:treatment_1,-0.0142,0.029,-0.495,0.620,-0.070,0.042

0,1,2,3
Omnibus:,188.501,Durbin-Watson:,2.092
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11622.364
Skew:,6.095,Prob(JB):,0.0
Kurtosis:,49.034,Cond. No.,9.86


# Fin t+1

In [65]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [66]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_50.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [67]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [68]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [69]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [70]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [71]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [72]:
frames = [actual, initial, difference]

In [73]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 6))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [74]:
#frames[0]

In [75]:
actual, initial, difference = frames

### Models for actual budgets

In [76]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.047
Model:,OLS,Adj. R-squared:,0.032
Method:,Least Squares,F-statistic:,41.99
Date:,"Wed, 10 May 2023",Prob (F-statistic):,7.63e-05
Time:,21:40:45,Log-Likelihood:,-1759.6
No. Observations:,203,AIC:,3527.0
Df Residuals:,199,BIC:,3541.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1089.3436,244.511,4.455,0.000,610.110,1568.577
treatment_period,604.3688,353.614,1.709,0.087,-88.703,1297.440
treatment_1,-407.7288,236.857,-1.721,0.085,-871.961,56.503
treatment_period:treatment_1,-895.0718,324.736,-2.756,0.006,-1531.544,-258.600

0,1,2,3
Omnibus:,96.19,Durbin-Watson:,1.96
Prob(Omnibus):,0.0,Jarque-Bera (JB):,316.475
Skew:,2.031,Prob(JB):,1.9e-69
Kurtosis:,7.574,Cond. No.,11.8


In [77]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.166
Model:,OLS,Adj. R-squared:,0.122
Method:,Least Squares,F-statistic:,5.361
Date:,"Wed, 10 May 2023",Prob (F-statistic):,5.61e-07
Time:,21:40:47,Log-Likelihood:,-1746.1
No. Observations:,203,AIC:,3514.0
Df Residuals:,192,BIC:,3551.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1322.4275,271.889,4.864,0.000,789.535,1855.320
C(region)[T.Мурманская область],1215.8377,913.466,1.331,0.183,-574.522,3006.197
C(region)[T.Новгородская область],-872.9516,288.927,-3.021,0.003,-1439.237,-306.666
C(region)[T.Приморский край],-692.5982,283.194,-2.446,0.014,-1247.648,-137.548
C(region)[T.Республика Алтай],540.0342,471.018,1.147,0.252,-383.143,1463.212
C(region)[T.Республика Марий Эл],-441.9244,298.147,-1.482,0.138,-1026.282,142.433
C(region)[T.Тульская область],-366.9046,323.625,-1.134,0.257,-1001.198,267.389
C(region)[T.Хабаровский край],239.7210,397.380,0.603,0.546,-539.129,1018.571
treatment_period,311.8889,291.185,1.071,0.284,-258.823,882.601

0,1,2,3
Omnibus:,81.917,Durbin-Watson:,2.134
Prob(Omnibus):,0.0,Jarque-Bera (JB):,251.393
Skew:,1.712,Prob(JB):,2.5799999999999997e-55
Kurtosis:,7.242,Cond. No.,12.6


In [78]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.038
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,1.087
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.415
Time:,21:40:47,Log-Likelihood:,-2161.8
No. Observations:,202,AIC:,4332.0
Df Residuals:,198,BIC:,4345.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3346.3044,1889.498,1.771,0.077,-357.043,7049.652
treatment_period,4436.7593,2545.766,1.743,0.081,-552.850,9426.369
treatment_1,-1973.7271,1818.709,-1.085,0.278,-5538.330,1590.876
treatment_period:treatment_1,-4202.7956,2708.295,-1.552,0.121,-9510.956,1105.364

0,1,2,3
Omnibus:,271.53,Durbin-Watson:,1.08
Prob(Omnibus):,0.0,Jarque-Bera (JB):,16463.958
Skew:,5.891,Prob(JB):,0.0
Kurtosis:,45.63,Cond. No.,11.9


In [79]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.256
Model:,OLS,Adj. R-squared:,0.217
Method:,Least Squares,F-statistic:,6.072
Date:,"Wed, 10 May 2023",Prob (F-statistic):,5.29e-08
Time:,21:40:47,Log-Likelihood:,-2135.9
No. Observations:,202,AIC:,4294.0
Df Residuals:,191,BIC:,4330.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1806.5236,984.808,1.834,0.067,-123.664,3736.711
C(region)[T.Мурманская область],1449.2830,1236.637,1.172,0.241,-974.480,3873.046
C(region)[T.Новгородская область],-1234.5390,1026.735,-1.202,0.229,-3246.902,777.824
C(region)[T.Приморский край],-1038.3675,969.100,-1.071,0.284,-2937.768,861.033
C(region)[T.Республика Алтай],9.9499,936.944,0.011,0.992,-1826.426,1846.325
C(region)[T.Республика Марий Эл],-849.8184,729.045,-1.166,0.244,-2278.720,579.083
C(region)[T.Тульская область],1642.5313,1271.136,1.292,0.196,-848.849,4133.911
C(region)[T.Хабаровский край],1.296e+04,3492.881,3.710,0.000,6112.956,1.98e+04
treatment_period,1963.9366,2254.778,0.871,0.384,-2455.347,6383.220

0,1,2,3
Omnibus:,266.226,Durbin-Watson:,1.366
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18237.117
Skew:,5.605,Prob(JB):,0.0
Kurtosis:,48.179,Cond. No.,12.6


In [80]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.021
Method:,Least Squares,F-statistic:,5.622
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0279
Time:,21:40:48,Log-Likelihood:,-2162.8
No. Observations:,204,AIC:,4334.0
Df Residuals:,200,BIC:,4347.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.474e+04,2567.291,5.740,0.000,9703.281,1.98e+04
treatment_period,4019.2697,3324.911,1.209,0.227,-2497.436,1.05e+04
treatment_1,-17.0191,2420.800,-0.007,0.994,-4761.700,4727.662
treatment_period:treatment_1,-8100.3983,2792.565,-2.901,0.004,-1.36e+04,-2627.072

0,1,2,3
Omnibus:,135.388,Durbin-Watson:,0.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,957.754
Skew:,2.591,Prob(JB):,1.0600000000000002e-208
Kurtosis:,12.265,Cond. No.,11.9


In [81]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.021
Method:,Least Squares,F-statistic:,8.051
Date:,"Wed, 10 May 2023",Prob (F-statistic):,4.32e-05
Time:,21:40:48,Log-Likelihood:,-2162.8
No. Observations:,204,AIC:,4334.0
Df Residuals:,200,BIC:,4347.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.474e+04,804.240,18.322,0.000,1.32e+04,1.63e+04
treatment_period,4019.2697,1768.328,2.273,0.023,553.411,7485.129
treatment_1,-17.0191,1698.018,-0.010,0.992,-3345.074,3311.036
treatment_period:treatment_1,-8100.3983,2482.321,-3.263,0.001,-1.3e+04,-3235.138

0,1,2,3
Omnibus:,135.388,Durbin-Watson:,0.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,957.754
Skew:,2.591,Prob(JB):,1.0600000000000002e-208
Kurtosis:,12.265,Cond. No.,11.9


### Models for initial budgets

In [82]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.161
Model:,OLS,Adj. R-squared:,0.146
Method:,Least Squares,F-statistic:,57.55
Date:,"Wed, 10 May 2023",Prob (F-statistic):,8.18e-05
Time:,21:40:49,Log-Likelihood:,-1359.8
No. Observations:,166,AIC:,2728.0
Df Residuals:,162,BIC:,2740.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,487.4819,107.289,4.544,0.000,277.200,697.764
treatment_period,634.6159,169.236,3.750,0.000,302.919,966.313
treatment_1,-8.2726,185.933,-0.044,0.965,-372.694,356.149
treatment_period:treatment_1,787.5434,215.310,3.658,0.000,365.543,1209.544

0,1,2,3
Omnibus:,101.641,Durbin-Watson:,1.73
Prob(Omnibus):,0.0,Jarque-Bera (JB):,587.196
Skew:,2.294,Prob(JB):,3.11e-128
Kurtosis:,10.991,Cond. No.,8.72


In [83]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.23
Model:,OLS,Adj. R-squared:,0.186
Method:,Least Squares,F-statistic:,5.506
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.43e-06
Time:,21:40:49,Log-Likelihood:,-1352.7
No. Observations:,166,AIC:,2725.0
Df Residuals:,156,BIC:,2757.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,399.2484,125.194,3.189,0.001,153.873,644.624
C(region)[T.Мурманская область],906.3962,575.817,1.574,0.115,-222.183,2034.976
C(region)[T.Новгородская область],-162.1613,133.774,-1.212,0.225,-424.354,100.032
C(region)[T.Приморский край],101.5115,161.690,0.628,0.530,-215.396,418.419
C(region)[T.Республика Алтай],-566.8737,242.100,-2.341,0.019,-1041.380,-92.367
C(region)[T.Тульская область],29.1066,278.919,0.104,0.917,-517.564,575.778
C(region)[T.Хабаровский край],122.2823,231.065,0.529,0.597,-330.597,575.161
treatment_period,783.6876,196.400,3.990,0.000,398.751,1168.624
treatment_1,-45.7230,296.614,-0.154,0.877,-627.076,535.630

0,1,2,3
Omnibus:,88.391,Durbin-Watson:,1.895
Prob(Omnibus):,0.0,Jarque-Bera (JB):,439.844
Skew:,1.983,Prob(JB):,3.08e-96
Kurtosis:,9.919,Cond. No.,9.21


In [84]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.025
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,0.6655
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.603
Time:,21:40:49,Log-Likelihood:,-1871.9
No. Observations:,174,AIC:,3752.0
Df Residuals:,170,BIC:,3765.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2876.4785,1843.442,1.560,0.119,-736.601,6489.558
treatment_period,3295.2313,3080.120,1.070,0.285,-2741.694,9332.156
treatment_1,-2231.8004,1813.497,-1.231,0.218,-5786.189,1322.588
treatment_period:treatment_1,-3286.6800,2947.754,-1.115,0.265,-9064.171,2490.811

0,1,2,3
Omnibus:,208.531,Durbin-Watson:,1.168
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5927.678
Skew:,4.946,Prob(JB):,0.0
Kurtosis:,29.828,Cond. No.,8.83


In [85]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.197
Model:,OLS,Adj. R-squared:,0.153
Method:,Least Squares,F-statistic:,8.861
Date:,"Wed, 10 May 2023",Prob (F-statistic):,7.96e-11
Time:,21:40:50,Log-Likelihood:,-1855.0
No. Observations:,174,AIC:,3730.0
Df Residuals:,164,BIC:,3762.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1311.9198,2637.837,0.497,0.619,-3858.145,6481.984
C(region)[T.Мурманская область],433.7930,2588.979,0.168,0.867,-4640.512,5508.098
C(region)[T.Новгородская область],-1241.1649,2597.737,-0.478,0.633,-6332.636,3850.306
C(region)[T.Приморский край],-797.1361,2576.553,-0.309,0.757,-5847.086,4252.814
C(region)[T.Республика Алтай],-886.9908,1444.911,-0.614,0.539,-3718.964,1944.983
C(region)[T.Тульская область],-442.0950,1438.360,-0.307,0.759,-3261.229,2377.039
C(region)[T.Хабаровский край],1.146e+04,3973.324,2.883,0.004,3667.639,1.92e+04
treatment_period,1461.2359,2841.790,0.514,0.607,-4108.570,7031.042
treatment_1,-201.2506,999.337,-0.201,0.840,-2159.916,1757.415

0,1,2,3
Omnibus:,193.903,Durbin-Watson:,1.412
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5008.648
Skew:,4.414,Prob(JB):,0.0
Kurtosis:,27.757,Cond. No.,9.28


In [86]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.018
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,0.5023
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.695
Time:,21:40:50,Log-Likelihood:,-1912.3
No. Observations:,181,AIC:,3833.0
Df Residuals:,177,BIC:,3845.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.278e+04,2699.025,4.735,0.000,7490.115,1.81e+04
treatment_period,2269.7390,2633.339,0.862,0.389,-2891.511,7430.989
treatment_1,-1392.2926,3017.881,-0.461,0.645,-7307.231,4522.646
treatment_period:treatment_1,-2879.1394,3502.719,-0.822,0.411,-9744.342,3986.063

0,1,2,3
Omnibus:,116.238,Durbin-Watson:,0.804
Prob(Omnibus):,0.0,Jarque-Bera (JB):,737.397
Skew:,2.455,Prob(JB):,7.52e-161
Kurtosis:,11.583,Cond. No.,8.63


In [87]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.397
Model:,OLS,Adj. R-squared:,0.365
Method:,Least Squares,F-statistic:,22.98
Date:,"Wed, 10 May 2023",Prob (F-statistic):,2.51e-25
Time:,21:40:50,Log-Likelihood:,-1868.2
No. Observations:,181,AIC:,3756.0
Df Residuals:,171,BIC:,3788.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,8058.4004,1106.998,7.280,0.000,5888.725,1.02e+04
C(region)[T.Мурманская область],1.153e+04,1379.848,8.357,0.000,8826.286,1.42e+04
C(region)[T.Новгородская область],-997.7977,1271.282,-0.785,0.433,-3489.465,1493.870
C(region)[T.Приморский край],2400.4619,1272.387,1.887,0.059,-93.370,4894.294
C(region)[T.Республика Алтай],9220.6642,1505.286,6.126,0.000,6270.358,1.22e+04
C(region)[T.Тульская область],1540.0231,898.661,1.714,0.087,-221.321,3301.367
C(region)[T.Хабаровский край],1.452e+04,2542.740,5.711,0.000,9537.363,1.95e+04
treatment_period,471.6581,1826.353,0.258,0.796,-3107.927,4051.244
treatment_1,1112.3109,2075.485,0.536,0.592,-2955.564,5180.186

0,1,2,3
Omnibus:,98.409,Durbin-Watson:,1.222
Prob(Omnibus):,0.0,Jarque-Bera (JB):,801.649
Skew:,1.856,Prob(JB):,8.4e-175
Kurtosis:,12.619,Cond. No.,9.07


### Models for difference

In [88]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.012
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,30.14
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000514
Time:,21:40:51,Log-Likelihood:,-1175.6
No. Observations:,140,AIC:,2359.0
Df Residuals:,136,BIC:,2371.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,503.6857,180.838,2.785,0.005,149.249,858.122
treatment_period,166.3760,169.962,0.979,0.328,-166.743,499.495
treatment_1,-305.2615,310.088,-0.984,0.325,-913.022,302.499
treatment_period:treatment_1,-343.2636,309.962,-1.107,0.268,-950.778,264.250

0,1,2,3
Omnibus:,94.537,Durbin-Watson:,2.053
Prob(Omnibus):,0.0,Jarque-Bera (JB):,508.546
Skew:,2.523,Prob(JB):,3.72e-111
Kurtosis:,10.856,Cond. No.,13.6


In [89]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.104
Model:,OLS,Adj. R-squared:,0.042
Method:,Least Squares,F-statistic:,2.245
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0229
Time:,21:40:51,Log-Likelihood:,-1168.7
No. Observations:,140,AIC:,2357.0
Df Residuals:,130,BIC:,2387.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,904.7403,319.911,2.828,0.005,277.726,1531.755
C(region)[T.Мурманская область],143.1100,814.675,0.176,0.861,-1453.624,1739.844
C(region)[T.Новгородская область],-760.0748,328.722,-2.312,0.021,-1404.358,-115.792
C(region)[T.Приморский край],-780.4181,328.434,-2.376,0.017,-1424.136,-136.700
C(region)[T.Республика Алтай],-280.2029,289.059,-0.969,0.332,-846.748,286.342
C(region)[T.Тульская область],-352.5013,438.618,-0.804,0.422,-1212.178,507.175
C(region)[T.Хабаровский край],-22.4280,318.869,-0.070,0.944,-647.400,602.544
treatment_period,-155.9430,313.532,-0.497,0.619,-770.455,458.568
treatment_1,-345.9656,370.363,-0.934,0.350,-1071.863,379.932

0,1,2,3
Omnibus:,81.506,Durbin-Watson:,2.193
Prob(Omnibus):,0.0,Jarque-Bera (JB):,355.355
Skew:,2.185,Prob(JB):,6.85e-78
Kurtosis:,9.467,Cond. No.,14.7


In [90]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.018
Method:,Least Squares,F-statistic:,14.98
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00342
Time:,21:40:52,Log-Likelihood:,-1461.4
No. Observations:,146,AIC:,2931.0
Df Residuals:,142,BIC:,2943.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,778.2006,648.009,1.201,0.230,-491.873,2048.274
treatment_period,620.2855,641.487,0.967,0.334,-637.005,1877.576
treatment_1,56.7152,660.005,0.086,0.932,-1236.871,1350.301
treatment_period:treatment_1,-628.4255,651.485,-0.965,0.335,-1905.313,648.462

0,1,2,3
Omnibus:,175.88,Durbin-Watson:,1.777
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10788.928
Skew:,-4.225,Prob(JB):,0.0
Kurtosis:,44.257,Cond. No.,13.8


In [91]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.033
Model:,OLS,Adj. R-squared:,-0.031
Method:,Least Squares,F-statistic:,1.618
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.116
Time:,21:40:52,Log-Likelihood:,-1459.2
No. Observations:,146,AIC:,2938.0
Df Residuals:,136,BIC:,2968.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-567.7578,2838.002,-0.200,0.841,-6130.140,4994.624
C(region)[T.Мурманская область],1488.0884,2861.807,0.520,0.603,-4120.951,7097.127
C(region)[T.Новгородская область],1045.3154,2840.787,0.368,0.713,-4522.525,6613.156
C(region)[T.Приморский край],788.6823,2810.008,0.281,0.779,-4718.833,6296.198
C(region)[T.Республика Алтай],2175.2507,2052.665,1.060,0.289,-1847.899,6198.401
C(region)[T.Тульская область],2459.3706,2935.009,0.838,0.402,-3293.141,8211.882
C(region)[T.Хабаровский край],2567.3918,2393.298,1.073,0.283,-2123.387,7258.170
treatment_period,235.0962,1741.177,0.135,0.893,-3177.547,3647.740
treatment_1,-33.4365,733.766,-0.046,0.964,-1471.591,1404.718

0,1,2,3
Omnibus:,175.072,Durbin-Watson:,1.83
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9763.128
Skew:,-4.239,Prob(JB):,0.0
Kurtosis:,42.154,Cond. No.,15.0


In [92]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.03
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,17.51
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00227
Time:,21:40:52,Log-Likelihood:,-1487.2
No. Observations:,151,AIC:,2982.0
Df Residuals:,147,BIC:,2994.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2776.4708,1375.327,2.019,0.044,80.880,5472.061
treatment_period,1825.0073,1671.018,1.092,0.275,-1450.127,5100.142
treatment_1,516.0569,815.543,0.633,0.527,-1082.379,2114.493
treatment_period:treatment_1,-2697.3418,795.151,-3.392,0.001,-4255.810,-1138.874

0,1,2,3
Omnibus:,81.408,Durbin-Watson:,1.451
Prob(Omnibus):,0.0,Jarque-Bera (JB):,305.485
Skew:,2.117,Prob(JB):,4.6199999999999994e-67
Kurtosis:,8.535,Cond. No.,13.9


In [93]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.22
Method:,Least Squares,F-statistic:,12.82
Date:,"Wed, 10 May 2023",Prob (F-statistic):,9.13e-15
Time:,21:40:53,Log-Likelihood:,-1466.0
No. Observations:,151,AIC:,2952.0
Df Residuals:,141,BIC:,2982.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7257.8560,1042.290,6.963,0.000,5215.005,9300.707
C(region)[T.Мурманская область],-6718.0225,1022.363,-6.571,0.000,-8721.818,-4714.227
C(region)[T.Новгородская область],-5750.7817,1223.455,-4.700,0.000,-8148.709,-3352.854
C(region)[T.Приморский край],-7136.8997,1048.672,-6.806,0.000,-9192.259,-5081.541
C(region)[T.Республика Алтай],-2027.4069,1130.408,-1.794,0.073,-4242.965,188.151
C(region)[T.Тульская область],-5118.3152,1364.475,-3.751,0.000,-7792.637,-2443.993
C(region)[T.Хабаровский край],-2972.8851,1346.910,-2.207,0.027,-5612.780,-332.990
treatment_period,-807.2704,1152.357,-0.701,0.484,-3065.848,1451.307
treatment_1,712.1772,1344.368,0.530,0.596,-1922.736,3347.090

0,1,2,3
Omnibus:,100.161,Durbin-Watson:,1.755
Prob(Omnibus):,0.0,Jarque-Bera (JB):,825.751
Skew:,2.291,Prob(JB):,4.899999999999999e-180
Kurtosis:,13.5,Cond. No.,14.9


# Goods t-1 

In [94]:
with open("D:/DZ/Course_6/Diploma/Data/full_ind_data.csv", "rb") as inp:
    full_ind_data = pickle.load(inp)

In [95]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_50.csv", "rb") as inp:
    mun_data = pd.read_csv(inp, index_col=0)

In [96]:
mun_data.reset_index(inplace=True, drop=True)

In [97]:
full_ind_data.shape

(1386, 14)

In [98]:
full_ind_data.head()

Unnamed: 0,raion,region,treatment_1,treatment_2,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed
0,Владивостокский,Приморский край,1,1,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211
1,Владивостокский,Приморский край,1,1,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417
2,Владивостокский,Приморский край,1,1,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201
3,Владивостокский,Приморский край,1,1,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074
4,Владивостокский,Приморский край,1,1,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201


In [99]:
full_in_data = pd.merge(full_ind_data, mun_data, on=["raion", "region"], how="left")

In [100]:
full_ind_data = full_in_data.drop(columns=["treatment_1_x", "treatment_2_x"]).rename(columns={"treatment_1_y": "treatment_1", "treatment_2_y": "treatment_2"})

In [101]:
full_ind_data.shape

(1386, 14)

In [102]:
full_ind_data.head()

Unnamed: 0,raion,region,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed,treatment_1,treatment_2
0,Владивостокский,Приморский край,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211,0,1
1,Владивостокский,Приморский край,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417,0,1
2,Владивостокский,Приморский край,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201,0,1
3,Владивостокский,Приморский край,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074,0,1
4,Владивостокский,Приморский край,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201,0,1


In [103]:
two_time_points = pd.DataFrame()
for i in mapping.keys():
    two_time_points = two_time_points.append(full_ind_data.loc[(full_ind_data.region == i) & ((full_ind_data.year == mapping[i]) | (full_ind_data.year == mapping[i] + 4))])

two_time_points.reset_index(inplace=True, drop=True)

two_time_points["treatment_period"] = [1 if i > 2018 else 0 for i in two_time_points.year]

In [104]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.014
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,1.345
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.345
Time:,21:40:58,Log-Likelihood:,-992.95
No. Observations:,212,AIC:,1994.0
Df Residuals:,208,BIC:,2007.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,55.9842,7.891,7.095,0.000,40.519,71.449
treatment_period,-5.1334,8.174,-0.628,0.530,-21.155,10.888
treatment_1,6.9552,7.209,0.965,0.335,-7.175,21.085
treatment_period:treatment_1,-7.5060,7.117,-1.055,0.292,-21.455,6.443

0,1,2,3
Omnibus:,30.415,Durbin-Watson:,0.947
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9.548
Skew:,-0.214,Prob(JB):,0.00845
Kurtosis:,2.053,Cond. No.,9.1


In [105]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.352
Model:,OLS,Adj. R-squared:,0.323
Method:,Least Squares,F-statistic:,11.5
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.64e-14
Time:,21:40:58,Log-Likelihood:,-948.49
No. Observations:,212,AIC:,1917.0
Df Residuals:,202,BIC:,1951.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.2963,4.319,13.034,0.000,47.831,64.762
C(region)[T.Мурманская область],-19.2529,6.399,-3.009,0.003,-31.794,-6.712
C(region)[T.Новгородская область],-3.4894,4.960,-0.703,0.482,-13.211,6.233
C(region)[T.Республика Алтай],23.4355,6.680,3.508,0.000,10.342,36.529
C(region)[T.Республика Марий Эл],17.2767,5.360,3.224,0.001,6.772,27.781
C(region)[T.Тульская область],-8.1213,5.782,-1.405,0.160,-19.453,3.211
C(region)[T.Хабаровский край],-29.1705,6.706,-4.350,0.000,-42.314,-16.027
treatment_period,0.1859,4.660,0.040,0.968,-8.947,9.319
treatment_1,9.7774,5.277,1.853,0.064,-0.565,20.120

0,1,2,3
Omnibus:,2.368,Durbin-Watson:,1.433
Prob(Omnibus):,0.306,Jarque-Bera (JB):,2.134
Skew:,-0.119,Prob(JB):,0.344
Kurtosis:,3.43,Cond. No.,9.9


In [106]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.141
Model:,OLS,Adj. R-squared:,0.122
Method:,Least Squares,F-statistic:,48.4
Date:,"Wed, 10 May 2023",Prob (F-statistic):,4.77e-05
Time:,21:40:59,Log-Likelihood:,-506.97
No. Observations:,139,AIC:,1022.0
Df Residuals:,135,BIC:,1034.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.0168,1.180,2.556,0.011,0.703,5.330
treatment_period,14.6499,3.942,3.717,0.000,6.924,22.375
treatment_1,2.2132,3.115,0.711,0.477,-3.892,8.318
treatment_period:treatment_1,-2.8799,4.889,-0.589,0.556,-12.461,6.702

0,1,2,3
Omnibus:,115.422,Durbin-Watson:,1.996
Prob(Omnibus):,0.0,Jarque-Bera (JB):,875.612
Skew:,3.126,Prob(JB):,7.299999999999999e-191
Kurtosis:,13.588,Cond. No.,13.2


In [107]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.37
Model:,OLS,Adj. R-squared:,0.321
Method:,Least Squares,F-statistic:,3.791
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00017
Time:,21:40:59,Log-Likelihood:,-485.45
No. Observations:,139,AIC:,992.9
Df Residuals:,128,BIC:,1025.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,28.8500,29.911,0.965,0.335,-29.774,87.474
C(region)[T.Мурманская область],-42.7782,36.665,-1.167,0.243,-114.640,29.084
C(region)[T.Новгородская область],-26.2166,29.937,-0.876,0.381,-84.891,32.458
C(region)[T.Приморский край],-29.4897,29.915,-0.986,0.324,-88.123,29.143
C(region)[T.Республика Алтай],-23.9318,30.016,-0.797,0.425,-82.762,34.899
C(region)[T.Республика Марий Эл],-27.4570,29.931,-0.917,0.359,-86.120,31.206
C(region)[T.Тульская область],-24.6559,29.979,-0.822,0.411,-83.414,34.102
C(region)[T.Хабаровский край],-7.1833,34.597,-0.208,0.836,-74.993,60.626
treatment_period,18.3282,4.126,4.442,0.000,10.241,26.415

0,1,2,3
Omnibus:,73.612,Durbin-Watson:,2.176
Prob(Omnibus):,0.0,Jarque-Bera (JB):,277.655
Skew:,2.012,Prob(JB):,5.1e-61
Kurtosis:,8.634,Cond. No.,26.8


In [108]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.014
Model:,OLS,Adj. R-squared:,-0.015
Method:,Least Squares,F-statistic:,4.999
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0577
Time:,21:41:00,Log-Likelihood:,121.5
No. Observations:,107,AIC:,-235.0
Df Residuals:,103,BIC:,-224.3
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0139,0.004,3.413,0.001,0.006,0.022
treatment_period,0.0181,0.017,1.094,0.274,-0.014,0.051
treatment_1,0.0189,0.021,0.882,0.378,-0.023,0.061
treatment_period:treatment_1,-0.0495,0.036,-1.367,0.172,-0.120,0.021

0,1,2,3
Omnibus:,197.277,Durbin-Watson:,1.55
Prob(Omnibus):,0.0,Jarque-Bera (JB):,20503.65
Skew:,7.718,Prob(JB):,0.0
Kurtosis:,69.035,Cond. No.,9.37


In [109]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.063
Model:,OLS,Adj. R-squared:,-0.013
Method:,Least Squares,F-statistic:,0.7842
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.618
Time:,21:41:00,Log-Likelihood:,124.24
No. Observations:,107,AIC:,-230.5
Df Residuals:,98,BIC:,-206.4
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0079,0.005,1.590,0.112,-0.002,0.018
C(region)[T.Мурманская область],-0.0046,0.014,-0.336,0.737,-0.031,0.022
C(region)[T.Новгородская область],0.0021,0.013,0.155,0.877,-0.024,0.028
C(region)[T.Приморский край],0.0394,0.022,1.757,0.079,-0.005,0.083
C(region)[T.Тульская область],-0.0066,0.012,-0.570,0.569,-0.029,0.016
C(region)[T.Хабаровский край],0.0020,0.005,0.378,0.705,-0.008,0.012
treatment_period,0.0054,0.023,0.240,0.810,-0.039,0.050
treatment_1,0.0190,0.029,0.656,0.512,-0.038,0.076
treatment_period:treatment_1,-0.0483,0.047,-1.030,0.303,-0.140,0.044

0,1,2,3
Omnibus:,194.011,Durbin-Watson:,1.65
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19632.683
Skew:,7.476,Prob(JB):,0.0
Kurtosis:,67.653,Cond. No.,15.2


In [110]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.015
Model:,OLS,Adj. R-squared:,-0.04
Method:,Least Squares,F-statistic:,0.301
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.825
Time:,21:41:00,Log-Likelihood:,120.76
No. Observations:,58,AIC:,-233.5
Df Residuals:,54,BIC:,-225.3
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0069,0.009,0.768,0.443,-0.011,0.025
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0195,0.033,-0.585,0.558,-0.085,0.046

0,1,2,3
Omnibus:,99.906,Durbin-Watson:,2.097
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2458.052
Skew:,5.098,Prob(JB):,0.0
Kurtosis:,33.219,Cond. No.,10.3


In [111]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.004
Model:,OLS,Adj. R-squared:,-0.015
Method:,Least Squares,F-statistic:,7.192
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0291
Time:,21:41:01,Log-Likelihood:,226.68
No. Observations:,158,AIC:,-445.4
Df Residuals:,154,BIC:,-433.1
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0264,0.009,2.816,0.005,0.008,0.045
treatment_period,-0.0077,0.007,-1.163,0.245,-0.021,0.005
treatment_1,-0.0093,0.016,-0.591,0.554,-0.040,0.022
treatment_period:treatment_1,0.0154,0.013,1.175,0.240,-0.010,0.041

0,1,2,3
Omnibus:,301.383,Durbin-Watson:,1.666
Prob(Omnibus):,0.0,Jarque-Bera (JB):,74468.592
Skew:,9.575,Prob(JB):,0.0
Kurtosis:,107.618,Cond. No.,11.6


In [112]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.087
Model:,OLS,Adj. R-squared:,0.038
Method:,Least Squares,F-statistic:,5.529
Date:,"Wed, 10 May 2023",Prob (F-statistic):,3.99e-06
Time:,21:41:01,Log-Likelihood:,233.49
No. Observations:,158,AIC:,-449.0
Df Residuals:,149,BIC:,-421.4
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0121,0.004,3.278,0.001,0.005,0.019
C(region)[T.Мурманская область],0.0521,0.038,1.381,0.167,-0.022,0.126
C(region)[T.Новгородская область],0.0005,0.007,0.074,0.941,-0.014,0.015
C(region)[T.Приморский край],0.0127,0.008,1.676,0.094,-0.002,0.028
C(region)[T.Республика Алтай],0.0013,0.006,0.221,0.825,-0.011,0.013
C(region)[T.Хабаровский край],0.0214,0.007,3.093,0.002,0.008,0.035
treatment_period,-0.0109,0.014,-0.755,0.450,-0.039,0.017
treatment_1,-0.0080,0.018,-0.453,0.650,-0.043,0.027
treatment_period:treatment_1,0.0250,0.023,1.110,0.267,-0.019,0.069

0,1,2,3
Omnibus:,292.255,Durbin-Watson:,1.804
Prob(Omnibus):,0.0,Jarque-Bera (JB):,65202.624
Skew:,9.027,Prob(JB):,0.0
Kurtosis:,100.868,Cond. No.,12.8


# Fin t-1

In [113]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [114]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_50.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [115]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [116]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [117]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [118]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [119]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [120]:
frames = [actual, initial, difference]

In [121]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 4))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [122]:
#frames[0]

In [123]:
actual, initial, difference = frames

### Models for actual budgets

In [124]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,1.919
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.215
Time:,21:41:25,Log-Likelihood:,-2315.0
No. Observations:,268,AIC:,4638.0
Df Residuals:,264,BIC:,4652.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1110.0927,171.543,6.471,0.000,773.874,1446.312
treatment_period,417.4527,259.151,1.611,0.107,-90.475,925.380
treatment_1,-34.1387,312.048,-0.109,0.913,-645.741,577.464
treatment_period:treatment_1,-314.3115,492.698,-0.638,0.524,-1279.981,651.358

0,1,2,3
Omnibus:,117.431,Durbin-Watson:,1.819
Prob(Omnibus):,0.0,Jarque-Bera (JB):,372.44
Skew:,1.977,Prob(JB):,1.34e-81
Kurtosis:,7.209,Cond. No.,11.0


In [125]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.145
Model:,OLS,Adj. R-squared:,0.112
Method:,Least Squares,F-statistic:,6.314
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.16e-08
Time:,21:41:27,Log-Likelihood:,-2296.1
No. Observations:,268,AIC:,4614.0
Df Residuals:,257,BIC:,4654.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1160.7882,277.445,4.184,0.000,617.006,1704.570
C(region)[T.Мурманская область],915.9158,606.876,1.509,0.131,-273.540,2105.371
C(region)[T.Новгородская область],-886.0917,314.034,-2.822,0.005,-1501.588,-270.596
C(region)[T.Приморский край],-441.5502,286.321,-1.542,0.123,-1002.729,119.628
C(region)[T.Республика Алтай],632.2268,453.729,1.393,0.163,-257.065,1521.519
C(region)[T.Республика Марий Эл],-388.8398,310.333,-1.253,0.210,-997.081,219.402
C(region)[T.Тульская область],465.4485,404.784,1.150,0.250,-327.914,1258.811
C(region)[T.Хабаровский край],158.5869,366.094,0.433,0.665,-558.944,876.118
treatment_period,790.7698,220.315,3.589,0.000,358.960,1222.580

0,1,2,3
Omnibus:,119.572,Durbin-Watson:,2.031
Prob(Omnibus):,0.0,Jarque-Bera (JB):,419.419
Skew:,1.957,Prob(JB):,8.4e-92
Kurtosis:,7.715,Cond. No.,11.9


In [126]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,4.187
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0542
Time:,21:41:28,Log-Likelihood:,-2821.8
No. Observations:,265,AIC:,5652.0
Df Residuals:,261,BIC:,5666.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4615.9053,2815.094,1.640,0.101,-901.578,1.01e+04
treatment_period,-2751.7189,2842.668,-0.968,0.333,-8323.247,2819.809
treatment_1,-3171.3576,2786.011,-1.138,0.255,-8631.838,2289.123
treatment_period:treatment_1,1729.5399,2830.531,0.611,0.541,-3818.199,7277.279

0,1,2,3
Omnibus:,363.014,Durbin-Watson:,1.12
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27272.6
Skew:,6.506,Prob(JB):,0.0
Kurtosis:,50.965,Cond. No.,11.0


In [127]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.26
Model:,OLS,Adj. R-squared:,0.231
Method:,Least Squares,F-statistic:,9.106
Date:,"Wed, 10 May 2023",Prob (F-statistic):,7.83e-13
Time:,21:41:28,Log-Likelihood:,-2784.1
No. Observations:,265,AIC:,5590.0
Df Residuals:,254,BIC:,5630.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2351.7021,503.578,4.670,0.000,1364.706,3338.698
C(region)[T.Мурманская область],991.0264,701.397,1.413,0.158,-383.686,2365.739
C(region)[T.Новгородская область],-2298.4542,533.656,-4.307,0.000,-3344.400,-1252.508
C(region)[T.Приморский край],-1419.0358,496.934,-2.856,0.004,-2393.009,-445.063
C(region)[T.Республика Алтай],129.5032,638.203,0.203,0.839,-1121.351,1380.358
C(region)[T.Республика Марий Эл],-1071.8035,532.178,-2.014,0.044,-2114.854,-28.753
C(region)[T.Тульская область],157.2195,673.821,0.233,0.816,-1163.445,1477.884
C(region)[T.Хабаровский край],1.403e+04,3868.980,3.626,0.000,6445.271,2.16e+04
treatment_period,878.2828,252.238,3.482,0.000,383.906,1372.660

0,1,2,3
Omnibus:,340.497,Durbin-Watson:,1.433
Prob(Omnibus):,0.0,Jarque-Bera (JB):,24266.833
Skew:,5.79,Prob(JB):,0.0
Kurtosis:,48.427,Cond. No.,11.9


In [128]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.011
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,6.61
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0189
Time:,21:41:28,Log-Likelihood:,-2812.7
No. Observations:,268,AIC:,5633.0
Df Residuals:,264,BIC:,5648.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.547e+04,2945.390,5.251,0.000,9693.268,2.12e+04
treatment_period,-1566.3525,3021.604,-0.518,0.604,-7488.588,4355.883
treatment_1,-2192.6656,2855.621,-0.768,0.443,-7789.579,3404.248
treatment_period:treatment_1,7720.1327,2938.735,2.627,0.009,1960.319,1.35e+04

0,1,2,3
Omnibus:,172.98,Durbin-Watson:,0.831
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1384.295
Skew:,2.588,Prob(JB):,2.54e-301
Kurtosis:,12.857,Cond. No.,11.0


In [129]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.445
Model:,OLS,Adj. R-squared:,0.423
Method:,Least Squares,F-statistic:,45.31
Date:,"Wed, 10 May 2023",Prob (F-statistic):,3.98e-51
Time:,21:41:29,Log-Likelihood:,-2735.3
No. Observations:,268,AIC:,5493.0
Df Residuals:,257,BIC:,5532.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.634e+04,1082.405,15.092,0.000,1.42e+04,1.85e+04
C(region)[T.Мурманская область],2979.1335,1229.206,2.424,0.015,569.934,5388.334
C(region)[T.Новгородская область],-7470.7257,1257.774,-5.940,0.000,-9935.918,-5005.534
C(region)[T.Приморский край],-4910.9698,1181.726,-4.156,0.000,-7227.110,-2594.829
C(region)[T.Республика Алтай],4278.7607,1645.099,2.601,0.009,1054.427,7503.095
C(region)[T.Республика Марий Эл],-7377.8731,1141.270,-6.465,0.000,-9614.721,-5141.025
C(region)[T.Тульская область],-5873.2813,1118.174,-5.253,0.000,-8064.863,-3681.700
C(region)[T.Хабаровский край],9755.2672,2639.358,3.696,0.000,4582.220,1.49e+04
treatment_period,2162.6424,776.977,2.783,0.005,639.795,3685.490

0,1,2,3
Omnibus:,170.318,Durbin-Watson:,1.251
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1894.623
Skew:,2.379,Prob(JB):,0.0
Kurtosis:,15.126,Cond. No.,11.9


### Models for initial budgets

In [130]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.027
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,8.996
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0122
Time:,21:41:29,Log-Likelihood:,-2097.8
No. Observations:,225,AIC:,4204.0
Df Residuals:,221,BIC:,4217.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,529.9112,88.788,5.968,0.000,355.890,703.933
treatment_period,967.7553,553.750,1.748,0.081,-117.574,2053.085
treatment_1,269.8522,142.656,1.892,0.059,-9.748,549.452
treatment_period:treatment_1,-975.9471,593.250,-1.645,0.100,-2138.695,186.801

0,1,2,3
Omnibus:,466.813,Durbin-Watson:,2.031
Prob(Omnibus):,0.0,Jarque-Bera (JB):,296590.281
Skew:,12.675,Prob(JB):,0.0
Kurtosis:,179.05,Cond. No.,8.64


In [131]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.052
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,3.611
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000327
Time:,21:41:29,Log-Likelihood:,-2094.9
No. Observations:,225,AIC:,4210.0
Df Residuals:,215,BIC:,4244.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1138.2531,707.390,1.609,0.108,-248.206,2524.712
C(region)[T.Мурманская область],-201.9513,997.824,-0.202,0.840,-2157.651,1753.748
C(region)[T.Новгородская область],-1267.1519,941.178,-1.346,0.178,-3111.827,577.523
C(region)[T.Приморский край],-839.0351,918.413,-0.914,0.361,-2639.091,961.021
C(region)[T.Республика Алтай],-758.9169,713.468,-1.064,0.287,-2157.288,639.454
C(region)[T.Тульская область],-272.0862,711.501,-0.382,0.702,-1666.602,1122.430
C(region)[T.Хабаровский край],-525.7994,722.742,-0.728,0.467,-1942.348,890.749
treatment_period,1003.6417,509.440,1.970,0.049,5.158,2002.125
treatment_1,50.1086,316.346,0.158,0.874,-569.918,670.135

0,1,2,3
Omnibus:,464.043,Durbin-Watson:,2.091
Prob(Omnibus):,0.0,Jarque-Bera (JB):,287368.68
Skew:,12.52,Prob(JB):,0.0
Kurtosis:,176.279,Cond. No.,9.65


In [132]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.004
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,1.29
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.36
Time:,21:41:30,Log-Likelihood:,-2554.2
No. Observations:,232,AIC:,5116.0
Df Residuals:,228,BIC:,5130.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4311.9035,2882.489,1.496,0.135,-1337.671,9961.478
treatment_period,-515.8350,3099.403,-0.166,0.868,-6590.554,5558.884
treatment_1,-3550.8864,2873.688,-1.236,0.217,-9183.212,2081.439
treatment_period:treatment_1,417.2023,3187.494,0.131,0.896,-5830.171,6664.576

0,1,2,3
Omnibus:,359.914,Durbin-Watson:,1.631
Prob(Omnibus):,0.0,Jarque-Bera (JB):,49600.452
Skew:,7.573,Prob(JB):,0.0
Kurtosis:,73.012,Cond. No.,8.7


In [133]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.1
Model:,OLS,Adj. R-squared:,0.064
Method:,Least Squares,F-statistic:,3.636
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000296
Time:,21:41:30,Log-Likelihood:,-2542.5
No. Observations:,232,AIC:,5105.0
Df Residuals:,222,BIC:,5140.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,5438.9051,4302.456,1.264,0.206,-2993.754,1.39e+04
C(region)[T.Мурманская область],-4110.2829,4972.079,-0.827,0.408,-1.39e+04,5634.814
C(region)[T.Новгородская область],-6192.5455,4958.460,-1.249,0.212,-1.59e+04,3525.857
C(region)[T.Приморский край],-4774.7362,4950.751,-0.964,0.335,-1.45e+04,4928.557
C(region)[T.Республика Алтай],-4690.1848,4305.375,-1.089,0.276,-1.31e+04,3748.194
C(region)[T.Тульская область],-4460.5027,4257.581,-1.048,0.295,-1.28e+04,3884.202
C(region)[T.Хабаровский край],7891.3404,5557.783,1.420,0.156,-3001.714,1.88e+04
treatment_period,2233.8078,2090.089,1.069,0.285,-1862.692,6330.307
treatment_1,-405.8632,706.225,-0.575,0.565,-1790.039,978.313

0,1,2,3
Omnibus:,363.108,Durbin-Watson:,1.818
Prob(Omnibus):,0.0,Jarque-Bera (JB):,57089.304
Skew:,7.628,Prob(JB):,0.0
Kurtosis:,78.32,Cond. No.,9.79


In [134]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.018
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,1.806
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.246
Time:,21:41:31,Log-Likelihood:,-2523.9
No. Observations:,241,AIC:,5056.0
Df Residuals:,237,BIC:,5070.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.363e+04,2785.806,4.894,0.000,8174.820,1.91e+04
treatment_period,-1408.7588,2880.164,-0.489,0.625,-7053.776,4236.258
treatment_1,-2632.3492,2897.872,-0.908,0.364,-8312.074,3047.375
treatment_period:treatment_1,8309.3105,3643.310,2.281,0.023,1168.554,1.55e+04

0,1,2,3
Omnibus:,140.871,Durbin-Watson:,0.853
Prob(Omnibus):,0.0,Jarque-Bera (JB):,934.593
Skew:,2.299,Prob(JB):,1.14e-203
Kurtosis:,11.481,Cond. No.,8.67


In [135]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.399
Model:,OLS,Adj. R-squared:,0.376
Method:,Least Squares,F-statistic:,33.4
Date:,"Wed, 10 May 2023",Prob (F-statistic):,3.3200000000000002e-37
Time:,21:41:31,Log-Likelihood:,-2464.7
No. Observations:,241,AIC:,4949.0
Df Residuals:,231,BIC:,4984.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7113.6553,620.358,11.467,0.000,5897.775,8329.536
C(region)[T.Мурманская область],1.294e+04,879.935,14.709,0.000,1.12e+04,1.47e+04
C(region)[T.Новгородская область],-223.4584,807.727,-0.277,0.782,-1806.574,1359.657
C(region)[T.Приморский край],4082.4923,907.272,4.500,0.000,2304.273,5860.712
C(region)[T.Республика Алтай],7549.6999,1245.105,6.064,0.000,5109.339,9990.061
C(region)[T.Тульская область],1965.6612,879.874,2.234,0.025,241.139,3690.183
C(region)[T.Хабаровский край],1.465e+04,2497.186,5.867,0.000,9755.639,1.95e+04
treatment_period,1998.9477,634.627,3.150,0.002,755.102,3242.794
treatment_1,1273.9097,1513.735,0.842,0.400,-1692.957,4240.776

0,1,2,3
Omnibus:,134.613,Durbin-Watson:,1.227
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1349.459
Skew:,1.984,Prob(JB):,9.310000000000001e-294
Kurtosis:,13.893,Cond. No.,9.61


### Models for difference

In [136]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.015
Method:,Least Squares,F-statistic:,0.06955
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.974
Time:,21:41:31,Log-Likelihood:,-1689.6
No. Observations:,200,AIC:,3387.0
Df Residuals:,196,BIC:,3400.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,543.4611,137.520,3.952,0.000,273.927,812.996
treatment_period,17.7144,121.163,0.146,0.884,-219.762,255.190
treatment_1,-84.7111,252.078,-0.336,0.737,-578.775,409.353
treatment_period:treatment_1,223.7690,510.387,0.438,0.661,-776.570,1224.108

0,1,2,3
Omnibus:,151.049,Durbin-Watson:,2.046
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1351.184
Skew:,2.955,Prob(JB):,3.93e-294
Kurtosis:,14.279,Cond. No.,10.1


In [137]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.074
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,3.327
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.000848
Time:,21:41:32,Log-Likelihood:,-1682.0
No. Observations:,200,AIC:,3384.0
Df Residuals:,190,BIC:,3417.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,967.0651,319.318,3.029,0.002,341.214,1592.917
C(region)[T.Мурманская область],-374.3487,522.472,-0.716,0.474,-1398.375,649.677
C(region)[T.Новгородская область],-834.3966,329.095,-2.535,0.011,-1479.411,-189.382
C(region)[T.Приморский край],-747.5940,304.412,-2.456,0.014,-1344.231,-150.957
C(region)[T.Республика Алтай],-655.7465,346.214,-1.894,0.058,-1334.314,22.821
C(region)[T.Тульская область],75.6792,501.590,0.151,0.880,-907.419,1058.777
C(region)[T.Хабаровский край],-291.8986,369.605,-0.790,0.430,-1016.311,432.514
treatment_period,268.8169,177.875,1.511,0.131,-79.812,617.446
treatment_1,-341.0705,379.296,-0.899,0.369,-1084.477,402.336

0,1,2,3
Omnibus:,149.69,Durbin-Watson:,2.123
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1367.698
Skew:,2.904,Prob(JB):,1.0199999999999999e-297
Kurtosis:,14.419,Cond. No.,11.2


In [138]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.011
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,9.735
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.0101
Time:,21:41:32,Log-Likelihood:,-2069.6
No. Observations:,205,AIC:,4147.0
Df Residuals:,201,BIC:,4160.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1280.2654,756.160,1.693,0.090,-201.781,2762.312
treatment_period,-1398.4295,936.525,-1.493,0.135,-3233.984,437.125
treatment_1,-540.5660,745.197,-0.725,0.468,-2001.124,919.992
treatment_period:treatment_1,673.7979,987.095,0.683,0.495,-1260.873,2608.468

0,1,2,3
Omnibus:,159.667,Durbin-Watson:,2.169
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13753.471
Skew:,-2.13,Prob(JB):,0.0
Kurtosis:,42.9,Cond. No.,10.2


In [139]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.053
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,3.033
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.00204
Time:,21:41:32,Log-Likelihood:,-2065.2
No. Observations:,205,AIC:,4150.0
Df Residuals:,195,BIC:,4184.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-524.2219,3206.166,-0.164,0.870,-6808.191,5759.747
C(region)[T.Мурманская область],1117.7774,3004.055,0.372,0.710,-4770.063,7005.618
C(region)[T.Новгородская область],774.0363,2981.706,0.260,0.795,-5070.000,6618.073
C(region)[T.Приморский край],287.7609,3032.485,0.095,0.924,-5655.801,6231.323
C(region)[T.Республика Алтай],2638.2771,3241.138,0.814,0.416,-3714.236,8990.790
C(region)[T.Тульская область],1694.6072,3117.507,0.544,0.587,-4415.594,7804.808
C(region)[T.Хабаровский край],3871.2955,3524.675,1.098,0.272,-3036.941,1.08e+04
treatment_period,-122.9851,912.197,-0.135,0.893,-1910.857,1664.887
treatment_1,31.2911,582.895,0.054,0.957,-1111.162,1173.744

0,1,2,3
Omnibus:,166.412,Durbin-Watson:,2.275
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12388.013
Skew:,-2.346,Prob(JB):,0.0
Kurtosis:,40.793,Cond. No.,11.4


In [140]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.034
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,2.218
Date:,"Wed, 10 May 2023",Prob (F-statistic):,0.187
Time:,21:41:33,Log-Likelihood:,-2087.2
No. Observations:,212,AIC:,4182.0
Df Residuals:,208,BIC:,4196.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3185.1015,1103.713,2.886,0.004,1021.863,5348.340
treatment_period,-1083.1216,742.671,-1.458,0.145,-2538.730,372.487
treatment_1,-877.4350,467.199,-1.878,0.060,-1793.128,38.258
treatment_period:treatment_1,-4284.9623,2600.363,-1.648,0.099,-9381.580,811.655

0,1,2,3
Omnibus:,67.255,Durbin-Watson:,1.252
Prob(Omnibus):,0.0,Jarque-Bera (JB):,153.642
Skew:,1.469,Prob(JB):,4.34e-34
Kurtosis:,5.959,Cond. No.,10.2


In [141]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.313
Model:,OLS,Adj. R-squared:,0.283
Method:,Least Squares,F-statistic:,13.31
Date:,"Wed, 10 May 2023",Prob (F-statistic):,1.21e-16
Time:,21:41:33,Log-Likelihood:,-2051.0
No. Observations:,212,AIC:,4122.0
Df Residuals:,202,BIC:,4156.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,8526.7714,1002.579,8.505,0.000,6561.752,1.05e+04
C(region)[T.Мурманская область],-7728.1857,994.391,-7.772,0.000,-9677.157,-5779.214
C(region)[T.Новгородская область],-6564.5965,1104.117,-5.946,0.000,-8728.626,-4400.567
C(region)[T.Приморский край],-8264.0049,1010.672,-8.177,0.000,-1.02e+04,-6283.124
C(region)[T.Республика Алтай],-3198.9359,1239.623,-2.581,0.010,-5628.553,-769.319
C(region)[T.Тульская область],-7142.3610,1085.829,-6.578,0.000,-9270.547,-5014.175
C(region)[T.Хабаровский край],-4558.2836,1404.462,-3.246,0.001,-7310.979,-1805.589
treatment_period,431.8009,584.572,0.739,0.460,-713.939,1577.540
treatment_1,63.7294,1321.102,0.048,0.962,-2525.582,2653.041

0,1,2,3
Omnibus:,87.75,Durbin-Watson:,1.566
Prob(Omnibus):,0.0,Jarque-Bera (JB):,434.728
Skew:,1.55,Prob(JB):,3.98e-95
Kurtosis:,9.294,Cond. No.,11.3
