In [1]:
import numpy as np
import pandas as pd
import dill as dl
import matplotlib.pyplot as plt
import sys
import pickle
import itertools
import re
import os
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as sm_f

In [2]:
pd.set_option("max_columns", None)
pd.set_option("max_rows", None)

# >= 75 specification

In [3]:
with open("D:/DZ/Course_6/Diploma/Data/two_time_points_75.csv", "rb") as inp:
    two_time_points = pd.read_csv(inp, index_col=0)

In [4]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.334
Model:,OLS,Adj. R-squared:,0.304
Method:,Least Squares,F-statistic:,10.71
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.67e-13
Time:,01:31:12,Log-Likelihood:,-932.84
No. Observations:,209,AIC:,1886.0
Df Residuals:,199,BIC:,1919.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,55.6984,3.957,14.076,0.000,47.943,63.454
C(region)[T.Мурманская область],-17.9481,6.273,-2.861,0.004,-30.243,-5.653
C(region)[T.Новгородская область],-1.1629,5.059,-0.230,0.818,-11.078,8.752
C(region)[T.Республика Алтай],23.5637,6.033,3.906,0.000,11.739,35.388
C(region)[T.Республика Марий Эл],19.6830,4.641,4.241,0.000,10.587,28.779
C(region)[T.Тульская область],-5.9577,4.947,-1.204,0.229,-15.654,3.739
C(region)[T.Хабаровский край],-22.6257,6.243,-3.624,0.000,-34.861,-10.390
treatment_period,-1.7168,3.244,-0.529,0.597,-8.074,4.641
treatment_1,10.3676,7.839,1.323,0.186,-4.996,25.731

0,1,2,3
Omnibus:,0.922,Durbin-Watson:,1.437
Prob(Omnibus):,0.631,Jarque-Bera (JB):,0.686
Skew:,-0.13,Prob(JB):,0.71
Kurtosis:,3.106,Cond. No.,10.4


In [5]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.013
Model:,OLS,Adj. R-squared:,-0.001
Method:,Least Squares,F-statistic:,5.019
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0448
Time:,01:31:12,Log-Likelihood:,-974.02
No. Observations:,209,AIC:,1956.0
Df Residuals:,205,BIC:,1969.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.7686,6.009,9.448,0.000,44.992,68.546
treatment_period,-3.8980,1.563,-2.494,0.013,-6.962,-0.834
treatment_1,8.7789,6.054,1.450,0.147,-3.086,20.644
treatment_period:treatment_1,-1.5432,2.501,-0.617,0.537,-6.445,3.359

0,1,2,3
Omnibus:,27.043,Durbin-Watson:,0.964
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8.989
Skew:,-0.211,Prob(JB):,0.0112
Kurtosis:,2.076,Cond. No.,9.97


In [6]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.057
Model:,OLS,Adj. R-squared:,0.039
Method:,Least Squares,F-statistic:,1.147
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.394
Time:,01:31:26,Log-Likelihood:,-636.19
No. Observations:,158,AIC:,1280.0
Df Residuals:,154,BIC:,1293.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.3840,1.433,2.361,0.018,0.575,6.193
treatment_period,5.6600,5.369,1.054,0.292,-4.862,16.182
treatment_1,0.1874,1.820,0.103,0.918,-3.380,3.755
treatment_period:treatment_1,6.2400,5.334,1.170,0.242,-4.214,16.694

0,1,2,3
Omnibus:,136.353,Durbin-Watson:,2.117
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1535.614
Skew:,3.211,Prob(JB):,0.0
Kurtosis:,16.857,Cond. No.,8.74


In [7]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.271
Model:,OLS,Adj. R-squared:,0.221
Method:,Least Squares,F-statistic:,1.132
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.343
Time:,01:31:27,Log-Likelihood:,-615.87
No. Observations:,158,AIC:,1254.0
Df Residuals:,147,BIC:,1287.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,32.0502,20.309,1.578,0.115,-7.755,71.855
C(region)[T.Мурманская область],-36.2906,42.364,-0.857,0.392,-119.323,46.742
C(region)[T.Новгородская область],-28.3150,20.499,-1.381,0.167,-68.492,11.862
C(region)[T.Приморский край],-28.2589,20.614,-1.371,0.170,-68.661,12.143
C(region)[T.Республика Алтай],-27.9886,21.096,-1.327,0.185,-69.337,13.360
C(region)[T.Республика Марий Эл],-34.2772,20.810,-1.647,0.100,-75.063,6.509
C(region)[T.Тульская область],-32.7482,20.532,-1.595,0.111,-72.990,7.493
C(region)[T.Хабаровский край],-10.3835,26.735,-0.388,0.698,-62.784,42.017
treatment_period,7.1404,3.036,2.352,0.019,1.191,13.090

0,1,2,3
Omnibus:,80.207,Durbin-Watson:,2.5
Prob(Omnibus):,0.0,Jarque-Bera (JB):,331.684
Skew:,1.934,Prob(JB):,9.450000000000001e-73
Kurtosis:,8.952,Cond. No.,20.3


In [8]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.049
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,2283.0
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.47e-09
Time:,01:31:27,Log-Likelihood:,160.9
No. Observations:,103,AIC:,-313.8
Df Residuals:,99,BIC:,-303.3
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0161,0.005,3.509,0.000,0.007,0.025
treatment_period,0.0055,0.011,0.498,0.618,-0.016,0.027
treatment_1,0.0839,0.005,18.239,0.000,0.075,0.093
treatment_period:treatment_1,-0.0853,0.012,-7.009,0.000,-0.109,-0.061

0,1,2,3
Omnibus:,149.262,Durbin-Watson:,2.241
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5458.469
Skew:,5.287,Prob(JB):,0.0
Kurtosis:,37.06,Cond. No.,13.1


In [9]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.069
Model:,OLS,Adj. R-squared:,-0.021
Method:,Least Squares,F-statistic:,0.39
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.937
Time:,01:31:28,Log-Likelihood:,162.02
No. Observations:,103,AIC:,-304.0
Df Residuals:,93,BIC:,-277.7
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0099,0.008,1.179,0.238,-0.007,0.026
C(region)[T.Мурманская область],0.0204,0.030,0.678,0.498,-0.039,0.079
C(region)[T.Новгородская область],-5.537e-05,0.008,-0.007,0.995,-0.017,0.017
C(region)[T.Приморский край],0.0076,0.014,0.545,0.586,-0.020,0.035
C(region)[T.Республика Алтай],-0.0075,0.048,-0.157,0.875,-0.101,0.086
C(region)[T.Тульская область],-0.0064,0.010,-0.662,0.508,-0.025,0.013
C(region)[T.Хабаровский край],0.0071,0.009,0.795,0.427,-0.010,0.025
treatment_period,0.0068,0.011,0.614,0.539,-0.015,0.028
treatment_1,0.0826,0.142,0.580,0.562,-0.196,0.361

0,1,2,3
Omnibus:,141.998,Durbin-Watson:,2.219
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4426.033
Skew:,4.929,Prob(JB):,0.0
Kurtosis:,33.564,Cond. No.,14.5


In [10]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.027
Model:,OLS,Adj. R-squared:,-0.033
Method:,Least Squares,F-statistic:,0.3548
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.786
Time:,01:31:28,Log-Likelihood:,126.45
No. Observations:,53,AIC:,-244.9
Df Residuals:,49,BIC:,-237.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0070,0.007,0.965,0.334,-0.007,0.021
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0047,0.042,-0.111,0.911,-0.087,0.078

0,1,2,3
Omnibus:,39.019,Durbin-Watson:,1.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,101.892
Skew:,2.187,Prob(JB):,7.489999999999999e-23
Kurtosis:,8.197,Cond. No.,9.67


In [11]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.027
Model:,OLS,Adj. R-squared:,-0.033
Method:,Least Squares,F-statistic:,0.3548
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.786
Time:,01:31:29,Log-Likelihood:,126.45
No. Observations:,53,AIC:,-244.9
Df Residuals:,49,BIC:,-237.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0070,0.007,0.965,0.334,-0.007,0.021
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0047,0.042,-0.111,0.911,-0.087,0.078

0,1,2,3
Omnibus:,39.019,Durbin-Watson:,1.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,101.892
Skew:,2.187,Prob(JB):,7.489999999999999e-23
Kurtosis:,8.197,Cond. No.,9.67


In [12]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.007
Model:,OLS,Adj. R-squared:,-0.012
Method:,Least Squares,F-statistic:,13.83
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00742
Time:,01:31:29,Log-Likelihood:,245.9
No. Observations:,167,AIC:,-483.8
Df Residuals:,163,BIC:,-471.3
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0267,0.011,2.426,0.015,0.005,0.048
treatment_period,-0.0080,0.010,-0.783,0.434,-0.028,0.012
treatment_1,-0.0029,0.018,-0.155,0.877,-0.039,0.033
treatment_period:treatment_1,-0.0074,0.015,-0.490,0.624,-0.037,0.022

0,1,2,3
Omnibus:,323.944,Durbin-Watson:,1.897
Prob(Omnibus):,0.0,Jarque-Bera (JB):,95005.395
Skew:,10.075,Prob(JB):,0.0
Kurtosis:,118.098,Cond. No.,12.1


In [13]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.071
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,3.951
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000275
Time:,01:31:30,Log-Likelihood:,251.54
No. Observations:,167,AIC:,-485.1
Df Residuals:,158,BIC:,-457.0
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0176,0.005,3.221,0.001,0.007,0.028
C(region)[T.Мурманская область],0.0401,0.034,1.197,0.231,-0.026,0.106
C(region)[T.Новгородская область],-0.0052,0.005,-1.079,0.281,-0.015,0.004
C(region)[T.Приморский край],0.0042,0.005,0.843,0.399,-0.006,0.014
C(region)[T.Республика Алтай],0.0041,0.007,0.583,0.560,-0.010,0.018
C(region)[T.Хабаровский край],0.0198,0.007,2.765,0.006,0.006,0.034
treatment_period,-0.0076,0.009,-0.833,0.405,-0.025,0.010
treatment_1,0.0041,0.015,0.272,0.786,-0.026,0.034
treatment_period:treatment_1,-0.0068,0.017,-0.391,0.696,-0.041,0.027

0,1,2,3
Omnibus:,315.811,Durbin-Watson:,2.01
Prob(Omnibus):,0.0,Jarque-Bera (JB):,84350.407
Skew:,9.592,Prob(JB):,0.0
Kurtosis:,111.417,Cond. No.,12.9


In [14]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [17]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_75.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [18]:
data_for_models.head()

Unnamed: 0,raion,region,treatment_1,treatment_2
0,Владивостокский,Приморский край,0,1
1,Хасанский район,Приморский край,0,1
2,Артемовский,Приморский край,0,0
3,Шкотовский район,Приморский край,0,0
4,Надеждинский район,Приморский край,0,0


In [19]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [20]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [21]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [22]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [23]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [24]:
frames = [actual, initial, difference]

In [25]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 5))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [24]:
#frames[0]

In [26]:
actual, initial, difference = frames

In [27]:
actual.head()

Unnamed: 0,region,raion,year,roads,housing,education,treatment_period,treatment_1,treatment_2
0,Приморский край,Анучинский район,2016,1915.738966,233.152838,11025.1236,0,0,1
1,Приморский край,Анучинский район,2021,,,,1,0,1
2,Приморский край,Арсеньевский,2016,928.610553,1899.621063,9445.185699,0,0,0
3,Приморский край,Арсеньевский,2021,726.384117,3508.931747,9828.53369,1,0,0
4,Приморский край,Артемовский,2016,59.130107,1628.357748,8157.6816,0,0,0


### Models for actual budgets

In [28]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.041
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,13.98
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00243
Time:,01:32:00,Log-Likelihood:,-2360.5
No. Observations:,271,AIC:,4729.0
Df Residuals:,267,BIC:,4743.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1091.8190,243.043,4.492,0.000,615.463,1568.175
treatment_period,531.9821,277.165,1.919,0.055,-11.251,1075.215
treatment_1,-583.5200,250.695,-2.328,0.020,-1074.873,-92.167
treatment_period:treatment_1,-45.5314,297.371,-0.153,0.878,-628.367,537.305

0,1,2,3
Omnibus:,185.212,Durbin-Watson:,1.615
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1855.992
Skew:,2.694,Prob(JB):,0.0
Kurtosis:,14.633,Cond. No.,10.8


In [29]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.144
Model:,OLS,Adj. R-squared:,0.111
Method:,Least Squares,F-statistic:,5.682
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.06e-07
Time:,01:32:01,Log-Likelihood:,-2345.0
No. Observations:,271,AIC:,4712.0
Df Residuals:,260,BIC:,4752.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1487.3051,323.071,4.604,0.000,854.098,2120.512
C(region)[T.Мурманская область],450.2682,553.067,0.814,0.416,-633.724,1534.260
C(region)[T.Новгородская область],-1067.5243,367.369,-2.906,0.004,-1787.554,-347.495
C(region)[T.Приморский край],-702.3622,347.474,-2.021,0.043,-1383.399,-21.325
C(region)[T.Республика Алтай],447.9722,506.947,0.884,0.377,-545.626,1441.570
C(region)[T.Республика Марий Эл],-709.6885,380.164,-1.867,0.062,-1454.797,35.420
C(region)[T.Тульская область],-98.2144,425.838,-0.231,0.818,-932.841,736.412
C(region)[T.Хабаровский край],-298.6572,437.247,-0.683,0.495,-1155.646,558.332
treatment_period,485.6161,194.665,2.495,0.013,104.079,867.153

0,1,2,3
Omnibus:,185.768,Durbin-Watson:,1.796
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2068.353
Skew:,2.655,Prob(JB):,0.0
Kurtosis:,15.449,Cond. No.,11.3


In [30]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.249
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.362
Time:,01:32:02,Log-Likelihood:,-2913.4
No. Observations:,270,AIC:,5835.0
Df Residuals:,266,BIC:,5849.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3329.4550,1849.916,1.800,0.072,-296.313,6955.223
treatment_period,2164.0062,1496.564,1.446,0.148,-769.205,5097.217
treatment_1,-2365.7899,1801.231,-1.313,0.189,-5896.138,1164.558
treatment_period:treatment_1,-1835.7719,1515.389,-1.211,0.226,-4805.880,1134.336

0,1,2,3
Omnibus:,381.991,Durbin-Watson:,1.129
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34279.239
Skew:,6.849,Prob(JB):,0.0
Kurtosis:,56.474,Cond. No.,10.8


In [31]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.256
Model:,OLS,Adj. R-squared:,0.227
Method:,Least Squares,F-statistic:,6.037
Date:,"Thu, 11 May 2023",Prob (F-statistic):,3.05e-08
Time:,01:32:02,Log-Likelihood:,-2875.1
No. Observations:,270,AIC:,5772.0
Df Residuals:,259,BIC:,5812.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1146.6467,764.266,1.500,0.134,-351.288,2644.581
C(region)[T.Мурманская область],1728.9052,879.946,1.965,0.049,4.242,3453.568
C(region)[T.Новгородская область],-1373.9298,483.748,-2.840,0.005,-2322.059,-425.800
C(region)[T.Приморский край],-430.0643,441.856,-0.973,0.330,-1296.087,435.958
C(region)[T.Республика Алтай],683.0098,658.835,1.037,0.300,-608.283,1974.302
C(region)[T.Республика Марий Эл],-618.2154,468.774,-1.319,0.187,-1536.995,300.564
C(region)[T.Тульская область],847.0468,676.406,1.252,0.210,-478.684,2172.778
C(region)[T.Хабаровский край],1.657e+04,4455.394,3.719,0.000,7836.922,2.53e+04
treatment_period,1812.9681,1376.164,1.317,0.188,-884.265,4510.201

0,1,2,3
Omnibus:,359.97,Durbin-Watson:,1.51
Prob(Omnibus):,0.0,Jarque-Bera (JB):,31228.153
Skew:,6.126,Prob(JB):,0.0
Kurtosis:,54.242,Cond. No.,11.4


In [32]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.005
Model:,OLS,Adj. R-squared:,-0.006
Method:,Least Squares,F-statistic:,0.2626
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.85
Time:,01:32:03,Log-Likelihood:,-2882.7
No. Observations:,273,AIC:,5773.0
Df Residuals:,269,BIC:,5788.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.471e+04,2533.937,5.806,0.000,9746.246,1.97e+04
treatment_period,1234.2998,1595.694,0.774,0.439,-1893.202,4361.802
treatment_1,338.3306,2737.057,0.124,0.902,-5026.202,5702.863
treatment_period:treatment_1,387.0135,3155.005,0.123,0.902,-5796.683,6570.710

0,1,2,3
Omnibus:,170.0,Durbin-Watson:,0.763
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1244.633
Skew:,2.518,Prob(JB):,5.390000000000001e-271
Kurtosis:,12.169,Cond. No.,10.8


In [33]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.468
Model:,OLS,Adj. R-squared:,0.447
Method:,Least Squares,F-statistic:,34.44
Date:,"Thu, 11 May 2023",Prob (F-statistic):,2.5399999999999998e-42
Time:,01:32:03,Log-Likelihood:,-2797.3
No. Observations:,273,AIC:,5617.0
Df Residuals:,262,BIC:,5656.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.442e+04,1036.710,13.911,0.000,1.24e+04,1.65e+04
C(region)[T.Мурманская область],8558.4113,1432.930,5.973,0.000,5749.921,1.14e+04
C(region)[T.Новгородская область],-5508.8594,967.119,-5.696,0.000,-7404.377,-3613.342
C(region)[T.Приморский край],-2914.8095,1113.167,-2.618,0.009,-5096.576,-733.043
C(region)[T.Республика Алтай],8090.2271,1866.966,4.333,0.000,4431.041,1.17e+04
C(region)[T.Республика Марий Эл],-5585.7837,1008.948,-5.536,0.000,-7563.285,-3608.282
C(region)[T.Тульская область],-4077.5717,1066.127,-3.825,0.000,-6167.143,-1988.000
C(region)[T.Хабаровский край],1.192e+04,2687.273,4.437,0.000,6657.346,1.72e+04
treatment_period,516.3744,898.380,0.575,0.565,-1244.419,2277.168

0,1,2,3
Omnibus:,165.048,Durbin-Watson:,1.372
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1728.167
Skew:,2.253,Prob(JB):,0.0
Kurtosis:,14.472,Cond. No.,11.4


### Models for initial budgets

In [34]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.058
Model:,OLS,Adj. R-squared:,0.045
Method:,Least Squares,F-statistic:,10.14
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00916
Time:,01:32:05,Log-Likelihood:,-1836.5
No. Observations:,215,AIC:,3681.0
Df Residuals:,211,BIC:,3694.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,487.1776,109.418,4.452,0.000,272.723,701.632
treatment_period,626.1800,206.629,3.030,0.002,221.195,1031.165
treatment_1,-6.0083,261.302,-0.023,0.982,-518.150,506.133
treatment_period:treatment_1,-179.5430,292.619,-0.614,0.539,-753.067,393.981

0,1,2,3
Omnibus:,251.872,Durbin-Watson:,1.861
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10824.056
Skew:,4.853,Prob(JB):,0.0
Kurtosis:,36.377,Cond. No.,11.3


In [35]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.11
Model:,OLS,Adj. R-squared:,0.071
Method:,Least Squares,F-statistic:,3.506
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000467
Time:,01:32:05,Log-Likelihood:,-1830.4
No. Observations:,215,AIC:,3681.0
Df Residuals:,205,BIC:,3715.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,634.2470,241.787,2.623,0.009,160.353,1108.141
C(region)[T.Мурманская область],454.8246,424.683,1.071,0.284,-377.538,1287.187
C(region)[T.Новгородская область],-530.2360,307.255,-1.726,0.084,-1132.445,71.973
C(region)[T.Приморский край],-17.9921,330.986,-0.054,0.957,-666.713,630.729
C(region)[T.Республика Алтай],-713.2216,383.931,-1.858,0.063,-1465.712,39.269
C(region)[T.Тульская область],-103.5942,337.745,-0.307,0.759,-765.561,558.373
C(region)[T.Хабаровский край],-300.1400,327.873,-0.915,0.360,-942.759,342.480
treatment_period,660.9600,190.260,3.474,0.001,288.057,1033.863
treatment_1,-112.5489,383.351,-0.294,0.769,-863.903,638.806

0,1,2,3
Omnibus:,256.267,Durbin-Watson:,1.971
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11754.244
Skew:,4.968,Prob(JB):,0.0
Kurtosis:,37.833,Cond. No.,11.9


In [36]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.006
Model:,OLS,Adj. R-squared:,-0.007
Method:,Least Squares,F-statistic:,3.235
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.103
Time:,01:32:06,Log-Likelihood:,-2383.4
No. Observations:,224,AIC:,4775.0
Df Residuals:,220,BIC:,4789.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2837.8508,1796.046,1.580,0.114,-682.334,6358.036
treatment_period,753.8328,1170.576,0.644,0.520,-1540.454,3048.119
treatment_1,-2744.2481,1797.736,-1.527,0.127,-6267.746,779.250
treatment_period:treatment_1,-634.4993,1124.519,-0.564,0.573,-2838.516,1569.517

0,1,2,3
Omnibus:,286.656,Durbin-Watson:,1.063
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13124.966
Skew:,5.728,Prob(JB):,0.0
Kurtosis:,38.707,Cond. No.,12.2


In [37]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.213
Model:,OLS,Adj. R-squared:,0.18
Method:,Least Squares,F-statistic:,9.919
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.1e-12
Time:,01:32:07,Log-Likelihood:,-2357.4
No. Observations:,224,AIC:,4735.0
Df Residuals:,214,BIC:,4769.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1310.1849,1876.451,0.698,0.485,-2367.592,4987.962
C(region)[T.Мурманская область],512.3113,1465.813,0.350,0.727,-2360.630,3385.253
C(region)[T.Новгородская область],-1564.5780,1465.455,-1.068,0.286,-4436.818,1307.662
C(region)[T.Приморский край],-748.2712,1440.344,-0.520,0.603,-3571.293,2074.751
C(region)[T.Республика Алтай],-773.5018,1359.041,-0.569,0.569,-3437.173,1890.170
C(region)[T.Тульская область],-604.0738,1313.439,-0.460,0.646,-3178.367,1970.219
C(region)[T.Хабаровский край],1.189e+04,3941.363,3.017,0.003,4166.636,1.96e+04
treatment_period,795.2004,1395.387,0.570,0.569,-1939.708,3530.109
treatment_1,-675.6442,931.043,-0.726,0.468,-2500.455,1149.166

0,1,2,3
Omnibus:,261.137,Durbin-Watson:,1.35
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10249.641
Skew:,4.924,Prob(JB):,0.0
Kurtosis:,34.642,Cond. No.,12.9


In [38]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.01
Model:,OLS,Adj. R-squared:,-0.003
Method:,Least Squares,F-statistic:,1.574
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.291
Time:,01:32:07,Log-Likelihood:,-2432.3
No. Observations:,231,AIC:,4873.0
Df Residuals:,227,BIC:,4886.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.279e+04,2666.773,4.797,0.000,7564.514,1.8e+04
treatment_period,1536.8007,945.736,1.625,0.104,-316.809,3390.410
treatment_1,-2300.9705,4043.888,-0.569,0.569,-1.02e+04,5624.903
treatment_period:treatment_1,666.4235,1356.405,0.491,0.623,-1992.081,3324.928

0,1,2,3
Omnibus:,126.147,Durbin-Watson:,0.719
Prob(Omnibus):,0.0,Jarque-Bera (JB):,706.593
Skew:,2.164,Prob(JB):,3.67e-154
Kurtosis:,10.395,Cond. No.,11.5


In [39]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.445
Model:,OLS,Adj. R-squared:,0.422
Method:,Least Squares,F-statistic:,33.44
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.12e-36
Time:,01:32:08,Log-Likelihood:,-2365.5
No. Observations:,231,AIC:,4751.0
Df Residuals:,221,BIC:,4785.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7755.5197,750.222,10.338,0.000,6285.111,9225.929
C(region)[T.Мурманская область],1.348e+04,982.657,13.719,0.000,1.16e+04,1.54e+04
C(region)[T.Новгородская область],-1184.7567,762.167,-1.554,0.120,-2678.577,309.063
C(region)[T.Приморский край],3099.8563,865.704,3.581,0.000,1403.107,4796.605
C(region)[T.Республика Алтай],9963.8777,1723.382,5.782,0.000,6586.111,1.33e+04
C(region)[T.Тульская область],753.7029,977.766,0.771,0.441,-1162.684,2670.090
C(region)[T.Хабаровский край],1.454e+04,2543.966,5.716,0.000,9555.336,1.95e+04
treatment_period,1362.9495,960.605,1.419,0.156,-519.801,3245.700
treatment_1,1450.2836,2978.341,0.487,0.626,-4387.158,7287.725

0,1,2,3
Omnibus:,116.043,Durbin-Watson:,1.24
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1092.784
Skew:,1.725,Prob(JB):,5.07e-238
Kurtosis:,13.081,Cond. No.,12.1


### Models for difference

In [40]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.006
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,5.443
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0379
Time:,01:32:22,Log-Likelihood:,-1751.5
No. Observations:,206,AIC:,3511.0
Df Residuals:,202,BIC:,3524.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,500.2389,179.948,2.780,0.005,147.548,852.930
treatment_period,25.1417,142.193,0.177,0.860,-253.552,303.836
treatment_1,-350.7175,441.567,-0.794,0.427,-1216.172,514.737
treatment_period:treatment_1,-64.7339,455.793,-0.142,0.887,-958.072,828.604

0,1,2,3
Omnibus:,69.625,Durbin-Watson:,1.936
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2542.263
Skew:,-0.412,Prob(JB):,0.0
Kurtosis:,20.19,Cond. No.,11.3


In [41]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.086
Model:,OLS,Adj. R-squared:,0.045
Method:,Least Squares,F-statistic:,3.023
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0021
Time:,01:32:23,Log-Likelihood:,-1742.8
No. Observations:,206,AIC:,3506.0
Df Residuals:,196,BIC:,3539.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,914.5471,263.329,3.473,0.001,398.431,1430.663
C(region)[T.Мурманская область],-177.0609,372.498,-0.475,0.635,-907.143,553.021
C(region)[T.Новгородская область],-673.4745,251.793,-2.675,0.007,-1166.980,-179.969
C(region)[T.Приморский край],-770.1456,292.694,-2.631,0.009,-1343.815,-196.477
C(region)[T.Республика Алтай],108.0594,296.441,0.365,0.715,-472.955,689.074
C(region)[T.Тульская область],77.9422,379.748,0.205,0.837,-666.350,822.234
C(region)[T.Хабаровский край],-163.5555,314.430,-0.520,0.603,-779.827,452.716
treatment_period,-93.9857,190.474,-0.493,0.622,-467.309,279.337
treatment_1,-534.2911,439.797,-1.215,0.224,-1396.277,327.695

0,1,2,3
Omnibus:,70.183,Durbin-Watson:,2.097
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2211.327
Skew:,-0.503,Prob(JB):,0.0
Kurtosis:,19.019,Cond. No.,12.0


In [42]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,9.89
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00973
Time:,01:32:23,Log-Likelihood:,-2145.0
No. Observations:,213,AIC:,4298.0
Df Residuals:,209,BIC:,4311.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,786.0978,628.389,1.251,0.211,-445.522,2017.718
treatment_period,1586.4996,766.621,2.069,0.039,83.950,3089.050
treatment_1,-71.9388,682.244,-0.105,0.916,-1409.112,1265.235
treatment_period:treatment_1,-1387.4163,855.747,-1.621,0.105,-3064.649,289.816

0,1,2,3
Omnibus:,88.061,Durbin-Watson:,2.075
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12080.534
Skew:,0.13,Prob(JB):,0.0
Kurtosis:,39.893,Cond. No.,12.2


In [43]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,2.13
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0975
Time:,01:32:24,Log-Likelihood:,-2145.0
No. Observations:,213,AIC:,4298.0
Df Residuals:,209,BIC:,4311.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,786.0978,615.394,1.277,0.201,-420.052,1992.248
treatment_period,1586.4996,835.926,1.898,0.058,-51.885,3224.884
treatment_1,-71.9388,748.613,-0.096,0.923,-1539.193,1395.315
treatment_period:treatment_1,-1387.4163,1007.970,-1.376,0.169,-3363.002,588.170

0,1,2,3
Omnibus:,88.061,Durbin-Watson:,2.075
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12080.534
Skew:,0.13,Prob(JB):,0.0
Kurtosis:,39.893,Cond. No.,12.2


In [44]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.01
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,2.265
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.181
Time:,01:32:25,Log-Likelihood:,-2150.3
No. Observations:,219,AIC:,4309.0
Df Residuals:,215,BIC:,4322.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2711.2271,1337.031,2.028,0.043,90.695,5331.759
treatment_period,-340.9246,1371.314,-0.249,0.804,-3028.651,2346.802
treatment_1,1959.3465,1921.988,1.019,0.308,-1807.680,5726.373
treatment_period:treatment_1,-284.4121,1783.275,-0.159,0.873,-3779.568,3210.743

0,1,2,3
Omnibus:,102.2,Durbin-Watson:,1.518
Prob(Omnibus):,0.0,Jarque-Bera (JB):,436.735
Skew:,1.878,Prob(JB):,1.4599999999999998e-95
Kurtosis:,8.81,Cond. No.,11.6


In [45]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.189
Model:,OLS,Adj. R-squared:,0.154
Method:,Least Squares,F-statistic:,6.298
Date:,"Thu, 11 May 2023",Prob (F-statistic):,7.07e-08
Time:,01:32:25,Log-Likelihood:,-2128.5
No. Observations:,219,AIC:,4277.0
Df Residuals:,209,BIC:,4311.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,5755.5531,1029.282,5.592,0.000,3738.198,7772.908
C(region)[T.Мурманская область],-4131.1786,1018.718,-4.055,0.000,-6127.830,-2134.528
C(region)[T.Новгородская область],-3549.6537,1086.546,-3.267,0.001,-5679.244,-1420.064
C(region)[T.Приморский край],-5284.9668,1029.914,-5.131,0.000,-7303.560,-3266.373
C(region)[T.Республика Алтай],-612.4978,1352.981,-0.453,0.651,-3264.291,2039.295
C(region)[T.Тульская область],-4109.3242,1062.011,-3.869,0.000,-6190.828,-2027.821
C(region)[T.Хабаровский край],-1810.9018,1443.388,-1.255,0.210,-4639.890,1018.086
treatment_period,-544.2600,586.417,-0.928,0.353,-1693.617,605.096
treatment_1,2046.4507,2274.438,0.900,0.368,-2411.366,6504.268

0,1,2,3
Omnibus:,98.321,Durbin-Watson:,1.826
Prob(Omnibus):,0.0,Jarque-Bera (JB):,536.148
Skew:,1.681,Prob(JB):,3.7800000000000005e-117
Kurtosis:,9.889,Cond. No.,12.2


# Lagged models

# t+1

In [46]:
with open("D:/DZ/Course_6/Diploma/Data/full_ind_data.csv", "rb") as inp:
    full_ind_data = pickle.load(inp)

In [47]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_75.csv", "rb") as inp:
    mun_data = pd.read_csv(inp, index_col=0)

In [48]:
mun_data.reset_index(inplace=True, drop=True)

In [49]:
full_ind_data.shape

(1386, 14)

In [50]:
full_ind_data.head()

Unnamed: 0,raion,region,treatment_1,treatment_2,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed
0,Владивостокский,Приморский край,1,1,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211
1,Владивостокский,Приморский край,1,1,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417
2,Владивостокский,Приморский край,1,1,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201
3,Владивостокский,Приморский край,1,1,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074
4,Владивостокский,Приморский край,1,1,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201


In [51]:
full_in_data = pd.merge(full_ind_data, mun_data, on=["raion", "region"], how="left")

In [52]:
full_ind_data = full_in_data.drop(columns=["treatment_1_x", "treatment_2_x"]).rename(columns={"treatment_1_y": "treatment_1", "treatment_2_y": "treatment_2"})

In [53]:
two_time_points = pd.DataFrame()
for i in mapping.keys():
    two_time_points = two_time_points.append(full_ind_data.loc[(full_ind_data.region == i) & ((full_ind_data.year == mapping[i]) | (full_ind_data.year == mapping[i] + 6))])

two_time_points.reset_index(inplace=True, drop=True)

two_time_points["treatment_period"] = [1 if i > 2018 else 0 for i in two_time_points.year]

In [54]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.009
Model:,OLS,Adj. R-squared:,-0.008
Method:,Least Squares,F-statistic:,4.709
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.051
Time:,01:32:38,Log-Likelihood:,-851.85
No. Observations:,182,AIC:,1712.0
Df Residuals:,178,BIC:,1725.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.7686,6.015,9.437,0.000,44.979,68.558
treatment_period,-2.6936,3.559,-0.757,0.449,-9.669,4.282
treatment_1,8.7789,6.060,1.449,0.147,-3.099,20.657
treatment_period:treatment_1,-4.1664,3.455,-1.206,0.228,-10.938,2.606

0,1,2,3
Omnibus:,21.995,Durbin-Watson:,1.064
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8.653
Skew:,-0.287,Prob(JB):,0.0132
Kurtosis:,2.099,Cond. No.,9.06


In [55]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.355
Model:,OLS,Adj. R-squared:,0.321
Method:,Least Squares,F-statistic:,9.985
Date:,"Thu, 11 May 2023",Prob (F-statistic):,2.84e-12
Time:,01:32:38,Log-Likelihood:,-812.76
No. Observations:,182,AIC:,1646.0
Df Residuals:,172,BIC:,1678.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.2249,4.143,13.571,0.000,48.105,64.345
C(region)[T.Мурманская область],-23.2662,9.693,-2.400,0.016,-42.264,-4.268
C(region)[T.Новгородская область],-0.8316,6.554,-0.127,0.899,-13.677,12.014
C(region)[T.Республика Алтай],22.2417,6.130,3.628,0.000,10.227,34.256
C(region)[T.Республика Марий Эл],20.2090,4.567,4.425,0.000,11.258,29.160
C(region)[T.Тульская область],-6.2688,4.966,-1.262,0.207,-16.002,3.464
C(region)[T.Хабаровский край],-23.0408,6.256,-3.683,0.000,-35.303,-10.779
treatment_period,-3.0796,3.858,-0.798,0.425,-10.641,4.482
treatment_1,9.9308,7.926,1.253,0.210,-5.604,25.465

0,1,2,3
Omnibus:,1.729,Durbin-Watson:,1.62
Prob(Omnibus):,0.421,Jarque-Bera (JB):,1.333
Skew:,-0.155,Prob(JB):,0.514
Kurtosis:,3.282,Cond. No.,9.71


In [56]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.07
Model:,OLS,Adj. R-squared:,0.053
Method:,Least Squares,F-statistic:,17.22
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00327
Time:,01:32:38,Log-Likelihood:,-434.46
No. Observations:,114,AIC:,874.9
Df Residuals:,111,BIC:,883.1
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.3840,1.454,2.328,0.020,0.535,6.233
treatment_period,9.4698,1.677,5.648,0.000,6.184,12.756
treatment_1,0.1874,1.846,0.101,0.919,-3.431,3.806
treatment_period:treatment_1,0,0,,,0,0

0,1,2,3
Omnibus:,83.586,Durbin-Watson:,2.075
Prob(Omnibus):,0.0,Jarque-Bera (JB):,391.73
Skew:,2.714,Prob(JB):,8.65e-86
Kurtosis:,10.281,Cond. No.,inf


In [57]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.2
Method:,Least Squares,F-statistic:,2.44
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0184
Time:,01:32:38,Log-Likelihood:,-421.69
No. Observations:,114,AIC:,861.4
Df Residuals:,105,BIC:,886.0
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,28.8500,29.911,0.965,0.335,-29.774,87.474
C(region)[T.Новгородская область],-26.7182,29.932,-0.893,0.372,-85.383,31.947
C(region)[T.Приморский край],-28.8732,29.912,-0.965,0.334,-87.499,29.753
C(region)[T.Республика Алтай],-24.7512,30.108,-0.822,0.411,-83.761,34.258
C(region)[T.Республика Марий Эл],-26.0638,29.982,-0.869,0.385,-84.827,32.700
C(region)[T.Тульская область],-23.8931,30.017,-0.796,0.426,-82.724,34.938
C(region)[T.Хабаровский край],-7.1833,34.597,-0.208,0.836,-74.993,60.626
treatment_period,8.9570,5.709,1.569,0.117,-2.233,20.147
treatment_1,0.3475,3.693,0.094,0.925,-6.890,7.585

0,1,2,3
Omnibus:,60.052,Durbin-Watson:,2.309
Prob(Omnibus):,0.0,Jarque-Bera (JB):,186.044
Skew:,1.986,Prob(JB):,3.9899999999999997e-41
Kurtosis:,7.837,Cond. No.,inf


In [58]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()



0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.134
Model:,OLS,Adj. R-squared:,0.09
Method:,Least Squares,F-statistic:,276.4
Date:,"Thu, 11 May 2023",Prob (F-statistic):,7.61e-06
Time:,01:32:39,Log-Likelihood:,116.01
No. Observations:,63,AIC:,-224.0
Df Residuals:,59,BIC:,-215.5
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0161,0.005,3.426,0.001,0.007,0.025
treatment_period,-0.0040,0.006,-0.662,0.508,-0.016,0.008
treatment_1,0.0839,0.005,17.808,0.000,0.075,0.093
treatment_period:treatment_1,-0.0930,0.006,-15.578,0.000,-0.105,-0.081

0,1,2,3
Omnibus:,84.041,Durbin-Watson:,2.534
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1271.862
Skew:,3.765,Prob(JB):,6.589999999999999e-277
Kurtosis:,23.684,Cond. No.,11.1


In [59]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.174
Model:,OLS,Adj. R-squared:,0.051
Method:,Least Squares,F-statistic:,0.9709
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.468
Time:,01:32:39,Log-Likelihood:,117.5
No. Observations:,63,AIC:,-217.0
Df Residuals:,54,BIC:,-197.7
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0049,0.004,1.222,0.222,-0.003,0.013
C(region)[T.Мурманская область],-0.0007,0.005,-0.137,0.891,-0.010,0.009
C(region)[T.Новгородская область],0.0152,0.010,1.575,0.115,-0.004,0.034
C(region)[T.Приморский край],0.0257,0.026,0.981,0.327,-0.026,0.077
C(region)[T.Тульская область],-0.0044,0.024,-0.182,0.855,-0.052,0.043
C(region)[T.Хабаровский край],0.0064,0.004,1.475,0.140,-0.002,0.015
treatment_period,0.0024,0.005,0.445,0.656,-0.008,0.013
treatment_1,0.0694,0.144,0.483,0.629,-0.212,0.351
treatment_period:treatment_1,-0.0737,0.154,-0.480,0.631,-0.375,0.227

0,1,2,3
Omnibus:,75.729,Durbin-Watson:,2.575
Prob(Omnibus):,0.0,Jarque-Bera (JB):,924.732
Skew:,3.3,Prob(JB):,1.57e-201
Kurtosis:,20.57,Cond. No.,13.3


In [60]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.013
Model:,OLS,Adj. R-squared:,-0.022
Method:,Least Squares,F-statistic:,0.06533
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.8
Time:,01:32:39,Log-Likelihood:,79.682
No. Observations:,30,AIC:,-155.4
Df Residuals:,28,BIC:,-152.6
Df Model:,1,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0,0,,,0,0
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,0,0,,,0,0

0,1,2,3
Omnibus:,25.292,Durbin-Watson:,1.901
Prob(Omnibus):,0.0,Jarque-Bera (JB):,45.327
Skew:,1.912,Prob(JB):,1.44e-10
Kurtosis:,7.651,Cond. No.,inf


In [61]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.005
Model:,OLS,Adj. R-squared:,-0.02
Method:,Least Squares,F-statistic:,17.62
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00433
Time:,01:32:40,Log-Likelihood:,148.54
No. Observations:,123,AIC:,-289.1
Df Residuals:,119,BIC:,-277.8
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0267,0.011,2.418,0.016,0.005,0.048
treatment_period,0.0079,0.012,0.667,0.505,-0.015,0.031
treatment_1,-0.0029,0.018,-0.155,0.877,-0.039,0.033
treatment_period:treatment_1,-0.0265,0.018,-1.461,0.144,-0.062,0.009

0,1,2,3
Omnibus:,204.632,Durbin-Watson:,2.049
Prob(Omnibus):,0.0,Jarque-Bera (JB):,16883.66
Skew:,6.949,Prob(JB):,0.0
Kurtosis:,58.689,Cond. No.,10.2


In [62]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.09
Model:,OLS,Adj. R-squared:,0.026
Method:,Least Squares,F-statistic:,4.676
Date:,"Thu, 11 May 2023",Prob (F-statistic):,5.72e-05
Time:,01:32:40,Log-Likelihood:,154.04
No. Observations:,123,AIC:,-290.1
Df Residuals:,114,BIC:,-264.8
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0162,0.008,2.047,0.041,0.001,0.032
C(region)[T.Мурманская область],0.0703,0.064,1.097,0.273,-0.055,0.196
C(region)[T.Новгородская область],-0.0114,0.008,-1.433,0.152,-0.027,0.004
C(region)[T.Приморский край],0.0025,0.009,0.288,0.773,-0.015,0.020
C(region)[T.Республика Алтай],-0.0120,0.019,-0.631,0.528,-0.049,0.025
C(region)[T.Хабаровский край],0.0161,0.016,1.025,0.305,-0.015,0.047
treatment_period,0.0136,0.016,0.847,0.397,-0.018,0.045
treatment_1,0.0064,0.016,0.405,0.686,-0.025,0.037
treatment_period:treatment_1,-0.0309,0.023,-1.319,0.187,-0.077,0.015

0,1,2,3
Omnibus:,188.893,Durbin-Watson:,2.094
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11740.691
Skew:,6.114,Prob(JB):,0.0
Kurtosis:,49.274,Cond. No.,10.9


# Fin t+1

In [62]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [63]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_75.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [64]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [65]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [66]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [67]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [68]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [69]:
frames = [actual, initial, difference]

In [70]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 6))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [71]:
#frames[0]

In [72]:
actual, initial, difference = frames

### Models for actual budgets

In [73]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.049
Model:,OLS,Adj. R-squared:,0.035
Method:,Least Squares,F-statistic:,41.54
Date:,"Thu, 11 May 2023",Prob (F-statistic):,7.91e-05
Time:,01:33:04,Log-Likelihood:,-1759.4
No. Observations:,203,AIC:,3527.0
Df Residuals:,199,BIC:,3540.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1091.8190,243.504,4.484,0.000,614.560,1569.078
treatment_period,601.8934,352.416,1.708,0.088,-88.830,1292.616
treatment_1,-583.5200,251.170,-2.323,0.020,-1075.805,-91.235
treatment_period:treatment_1,-719.2807,342.327,-2.101,0.036,-1390.230,-48.332

0,1,2,3
Omnibus:,96.384,Durbin-Watson:,1.969
Prob(Omnibus):,0.0,Jarque-Bera (JB):,318.879
Skew:,2.032,Prob(JB):,5.7e-70
Kurtosis:,7.603,Cond. No.,12.2


In [74]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.167
Model:,OLS,Adj. R-squared:,0.123
Method:,Least Squares,F-statistic:,5.504
Date:,"Thu, 11 May 2023",Prob (F-statistic):,3.48e-07
Time:,01:33:07,Log-Likelihood:,-1746.0
No. Observations:,203,AIC:,3514.0
Df Residuals:,192,BIC:,3550.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1331.6457,273.953,4.861,0.000,794.708,1868.583
C(region)[T.Мурманская область],1150.2159,920.163,1.250,0.211,-653.270,2953.702
C(region)[T.Новгородская область],-900.1165,295.137,-3.050,0.002,-1478.574,-321.659
C(region)[T.Приморский край],-695.6033,283.685,-2.452,0.014,-1251.615,-139.592
C(region)[T.Республика Алтай],532.9660,471.293,1.131,0.258,-390.751,1456.683
C(region)[T.Республика Марий Эл],-445.9538,298.581,-1.494,0.135,-1031.162,139.255
C(region)[T.Тульская область],-384.2702,320.061,-1.201,0.230,-1011.578,243.038
C(region)[T.Хабаровский край],232.7603,398.323,0.584,0.559,-547.938,1013.459
treatment_period,307.3738,291.598,1.054,0.292,-264.147,878.895

0,1,2,3
Omnibus:,82.167,Durbin-Watson:,2.142
Prob(Omnibus):,0.0,Jarque-Bera (JB):,252.948
Skew:,1.716,Prob(JB):,1.18e-55
Kurtosis:,7.257,Cond. No.,13.0


In [75]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.039
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,1.361
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.331
Time:,01:33:07,Log-Likelihood:,-2161.7
No. Observations:,202,AIC:,4331.0
Df Residuals:,198,BIC:,4345.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3329.4550,1853.455,1.796,0.072,-303.250,6962.160
treatment_period,4453.6087,2575.770,1.729,0.084,-594.807,9502.024
treatment_1,-2365.7899,1804.677,-1.311,0.190,-5902.892,1171.313
treatment_period:treatment_1,-3810.7328,2716.741,-1.403,0.161,-9135.447,1513.981

0,1,2,3
Omnibus:,271.613,Durbin-Watson:,1.079
Prob(Omnibus):,0.0,Jarque-Bera (JB):,16482.527
Skew:,5.894,Prob(JB):,0.0
Kurtosis:,45.654,Cond. No.,12.2


In [76]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.256
Model:,OLS,Adj. R-squared:,0.217
Method:,Least Squares,F-statistic:,6.166
Date:,"Thu, 11 May 2023",Prob (F-statistic):,3.87e-08
Time:,01:33:07,Log-Likelihood:,-2135.9
No. Observations:,202,AIC:,4294.0
Df Residuals:,191,BIC:,4330.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1841.7015,995.856,1.849,0.064,-110.140,3793.543
C(region)[T.Мурманская область],1306.1805,1266.998,1.031,0.303,-1177.091,3789.452
C(region)[T.Новгородская область],-1304.0566,1054.264,-1.237,0.216,-3370.377,762.263
C(region)[T.Приморский край],-1051.9534,973.711,-1.080,0.280,-2960.392,856.485
C(region)[T.Республика Алтай],-16.8795,936.889,-0.018,0.986,-1853.148,1819.389
C(region)[T.Республика Марий Эл],-866.5062,728.982,-1.189,0.235,-2295.284,562.272
C(region)[T.Тульская область],1626.4730,1255.209,1.296,0.195,-833.692,4086.638
C(region)[T.Хабаровский край],1.293e+04,3496.984,3.698,0.000,6078.085,1.98e+04
treatment_period,1947.2395,2259.632,0.862,0.389,-2481.557,6376.036

0,1,2,3
Omnibus:,266.313,Durbin-Watson:,1.366
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18263.716
Skew:,5.608,Prob(JB):,0.0
Kurtosis:,48.212,Cond. No.,13.0


In [77]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.021
Method:,Least Squares,F-statistic:,4.317
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0508
Time:,01:33:08,Log-Likelihood:,-2162.8
No. Observations:,204,AIC:,4334.0
Df Residuals:,200,BIC:,4347.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.471e+04,2538.753,5.795,0.000,9736.806,1.97e+04
treatment_period,4041.6780,3349.855,1.207,0.228,-2523.917,1.06e+04
treatment_1,338.3306,2742.259,0.123,0.902,-5036.399,5713.060
treatment_period:treatment_1,-8455.7480,3067.430,-2.757,0.006,-1.45e+04,-2443.696

0,1,2,3
Omnibus:,135.554,Durbin-Watson:,0.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,960.909
Skew:,2.594,Prob(JB):,2.1900000000000002e-209
Kurtosis:,12.281,Cond. No.,12.2


In [78]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.021
Method:,Least Squares,F-statistic:,8.105
Date:,"Thu, 11 May 2023",Prob (F-statistic):,4.03e-05
Time:,01:33:08,Log-Likelihood:,-2162.8
No. Observations:,204,AIC:,4334.0
Df Residuals:,200,BIC:,4347.0
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.471e+04,789.442,18.637,0.000,1.32e+04,1.63e+04
treatment_period,4041.6780,1761.647,2.294,0.022,588.913,7494.443
treatment_1,338.3306,1859.306,0.182,0.856,-3305.841,3982.503
treatment_period:treatment_1,-8455.7480,2595.317,-3.258,0.001,-1.35e+04,-3369.021

0,1,2,3
Omnibus:,135.554,Durbin-Watson:,0.803
Prob(Omnibus):,0.0,Jarque-Bera (JB):,960.909
Skew:,2.594,Prob(JB):,2.1900000000000002e-209
Kurtosis:,12.281,Cond. No.,12.2


### Models for initial budgets

In [79]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.139
Model:,OLS,Adj. R-squared:,0.123
Method:,Least Squares,F-statistic:,6.675
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0244
Time:,01:33:09,Log-Likelihood:,-1362.0
No. Observations:,166,AIC:,2732.0
Df Residuals:,162,BIC:,2745.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,487.1776,109.649,4.443,0.000,272.269,702.086
treatment_period,691.6911,184.348,3.752,0.000,330.376,1053.007
treatment_1,-6.0083,261.855,-0.023,0.982,-519.235,507.218
treatment_period:treatment_1,281.2167,399.689,0.704,0.482,-502.160,1064.593

0,1,2,3
Omnibus:,104.571,Durbin-Watson:,1.722
Prob(Omnibus):,0.0,Jarque-Bera (JB):,597.341
Skew:,2.389,Prob(JB):,1.95e-130
Kurtosis:,10.97,Cond. No.,9.73


In [80]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.212
Model:,OLS,Adj. R-squared:,0.167
Method:,Least Squares,F-statistic:,5.286
Date:,"Thu, 11 May 2023",Prob (F-statistic):,2.74e-06
Time:,01:33:09,Log-Likelihood:,-1354.6
No. Observations:,166,AIC:,2729.0
Df Residuals:,156,BIC:,2760.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,398.2102,127.185,3.131,0.002,148.931,647.489
C(region)[T.Мурманская область],901.7190,574.850,1.569,0.117,-224.965,2028.403
C(region)[T.Новгородская область],-163.3004,139.369,-1.172,0.241,-436.459,109.859
C(region)[T.Приморский край],98.2351,163.709,0.600,0.548,-222.628,419.099
C(region)[T.Республика Алтай],-631.1970,241.527,-2.613,0.009,-1104.581,-157.813
C(region)[T.Тульская область],106.5967,318.193,0.335,0.738,-517.051,730.244
C(region)[T.Хабаровский край],89.7566,230.713,0.389,0.697,-362.433,541.946
treatment_period,849.0491,204.025,4.161,0.000,449.167,1248.932
treatment_1,14.6818,393.378,0.037,0.970,-756.325,785.689

0,1,2,3
Omnibus:,92.861,Durbin-Watson:,1.894
Prob(Omnibus):,0.0,Jarque-Bera (JB):,467.085
Skew:,2.107,Prob(JB):,3.7499999999999996e-102
Kurtosis:,10.055,Cond. No.,10.4


In [81]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.024
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1.405
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.33
Time:,01:33:09,Log-Likelihood:,-1872.0
No. Observations:,174,AIC:,3752.0
Df Residuals:,170,BIC:,3765.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2837.8508,1799.595,1.577,0.115,-689.291,6364.993
treatment_period,3273.6532,3044.499,1.075,0.282,-2693.456,9240.762
treatment_1,-2744.2481,1801.289,-1.523,0.128,-6274.710,786.214
treatment_period:treatment_1,-3021.6808,2983.970,-1.013,0.311,-8870.154,2826.792

0,1,2,3
Omnibus:,208.822,Durbin-Watson:,1.167
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5956.293
Skew:,4.956,Prob(JB):,0.0
Kurtosis:,29.894,Cond. No.,10.5


In [82]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.198
Model:,OLS,Adj. R-squared:,0.154
Method:,Least Squares,F-statistic:,9.228
Date:,"Thu, 11 May 2023",Prob (F-statistic):,2.9e-11
Time:,01:33:09,Log-Likelihood:,-1854.9
No. Observations:,174,AIC:,3730.0
Df Residuals:,164,BIC:,3761.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1376.1070,2638.163,0.522,0.602,-3794.597,6546.811
C(region)[T.Мурманская область],349.4807,2675.577,0.131,0.896,-4894.553,5593.515
C(region)[T.Новгородская область],-1314.4999,2638.196,-0.498,0.618,-6485.270,3856.270
C(region)[T.Приморский край],-821.3838,2559.885,-0.321,0.748,-5838.667,4195.899
C(region)[T.Республика Алтай],-930.5900,1456.118,-0.639,0.523,-3784.528,1923.348
C(region)[T.Тульская область],-483.8677,1443.283,-0.335,0.737,-3312.650,2344.915
C(region)[T.Хабаровский край],1.141e+04,3979.317,2.867,0.004,3608.044,1.92e+04
treatment_period,1429.4180,2813.704,0.508,0.611,-4085.341,6944.177
treatment_1,-760.4036,1182.937,-0.643,0.520,-3078.918,1558.110

0,1,2,3
Omnibus:,193.968,Durbin-Watson:,1.413
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5017.474
Skew:,4.416,Prob(JB):,0.0
Kurtosis:,27.78,Cond. No.,11.1


In [83]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.022
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,0.8274
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.525
Time:,01:33:10,Log-Likelihood:,-1911.9
No. Observations:,181,AIC:,3832.0
Df Residuals:,177,BIC:,3845.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.279e+04,2671.682,4.788,0.000,7554.894,1.8e+04
treatment_period,2310.7825,2638.689,0.876,0.381,-2860.953,7482.518
treatment_1,-2300.9705,4051.331,-0.568,0.570,-1.02e+04,5639.492
treatment_period:treatment_1,-3265.5956,4374.743,-0.746,0.455,-1.18e+04,5308.743

0,1,2,3
Omnibus:,116.802,Durbin-Watson:,0.816
Prob(Omnibus):,0.0,Jarque-Bera (JB):,748.103
Skew:,2.466,Prob(JB):,3.56e-163
Kurtosis:,11.653,Cond. No.,9.99


In [84]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.396
Model:,OLS,Adj. R-squared:,0.364
Method:,Least Squares,F-statistic:,23.22
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.56e-25
Time:,01:33:10,Log-Likelihood:,-1868.3
No. Observations:,181,AIC:,3757.0
Df Residuals:,171,BIC:,3789.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,8095.2391,1121.087,7.221,0.000,5897.950,1.03e+04
C(region)[T.Мурманская область],1.159e+04,1420.773,8.155,0.000,8801.927,1.44e+04
C(region)[T.Новгородская область],-984.0769,1307.873,-0.752,0.452,-3547.461,1579.307
C(region)[T.Приморский край],2369.6909,1282.552,1.848,0.065,-144.065,4883.446
C(region)[T.Республика Алтай],9109.1960,1507.136,6.044,0.000,6155.264,1.21e+04
C(region)[T.Тульская область],1733.4705,1001.645,1.731,0.084,-229.718,3696.660
C(region)[T.Хабаровский край],1.442e+04,2546.729,5.664,0.000,9433.478,1.94e+04
treatment_period,586.9944,1817.790,0.323,0.747,-2975.809,4149.798
treatment_1,1027.3634,3255.785,0.316,0.752,-5353.859,7408.585

0,1,2,3
Omnibus:,98.593,Durbin-Watson:,1.22
Prob(Omnibus):,0.0,Jarque-Bera (JB):,801.186
Skew:,1.861,Prob(JB):,1.0599999999999999e-174
Kurtosis:,12.611,Cond. No.,10.6


### Models for difference

In [85]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.012
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,30.11
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000516
Time:,01:33:11,Log-Likelihood:,-1175.6
No. Observations:,140,AIC:,2359.0
Df Residuals:,136,BIC:,2371.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,500.2389,180.586,2.770,0.006,146.297,854.180
treatment_period,169.8228,170.007,0.999,0.318,-163.385,503.031
treatment_1,-350.7175,443.132,-0.791,0.429,-1219.240,517.805
treatment_period:treatment_1,-297.8076,445.106,-0.669,0.503,-1170.199,574.584

0,1,2,3
Omnibus:,94.86,Durbin-Watson:,2.048
Prob(Omnibus):,0.0,Jarque-Bera (JB):,511.442
Skew:,2.533,Prob(JB):,8.75e-112
Kurtosis:,10.875,Cond. No.,13.9


In [86]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.104
Model:,OLS,Adj. R-squared:,0.042
Method:,Least Squares,F-statistic:,2.253
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0224
Time:,01:33:11,Log-Likelihood:,-1168.8
No. Observations:,140,AIC:,2358.0
Df Residuals:,130,BIC:,2387.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,907.9419,323.846,2.804,0.005,273.216,1542.668
C(region)[T.Мурманская область],90.4847,813.767,0.111,0.911,-1504.470,1685.439
C(region)[T.Новгородская область],-779.7509,337.634,-2.309,0.021,-1441.501,-118.001
C(region)[T.Приморский край],-780.8502,330.247,-2.364,0.018,-1428.123,-133.577
C(region)[T.Республика Алтай],-281.5959,289.741,-0.972,0.331,-849.478,286.286
C(region)[T.Тульская область],-340.1934,451.978,-0.753,0.452,-1226.054,545.667
C(region)[T.Хабаровский край],-24.7009,320.851,-0.077,0.939,-653.556,604.155
treatment_period,-157.7516,315.025,-0.501,0.617,-775.189,459.686
treatment_1,-384.7393,503.520,-0.764,0.445,-1371.620,602.142

0,1,2,3
Omnibus:,82.205,Durbin-Watson:,2.19
Prob(Omnibus):,0.0,Jarque-Bera (JB):,361.762
Skew:,2.204,Prob(JB):,2.78e-79
Kurtosis:,9.526,Cond. No.,15.1


In [87]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.018
Method:,Least Squares,F-statistic:,14.95
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00343
Time:,01:33:11,Log-Likelihood:,-1461.4
No. Observations:,146,AIC:,2931.0
Df Residuals:,142,BIC:,2943.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,786.0978,630.483,1.247,0.212,-449.627,2021.822
treatment_period,612.3883,624.088,0.981,0.326,-610.802,1835.578
treatment_1,-71.9388,684.518,-0.105,0.916,-1413.569,1269.691
treatment_period:treatment_1,-499.7715,673.828,-0.742,0.458,-1820.449,820.906

0,1,2,3
Omnibus:,175.972,Durbin-Watson:,1.777
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10802.281
Skew:,-4.228,Prob(JB):,0.0
Kurtosis:,44.282,Cond. No.,14.4


In [88]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.033
Model:,OLS,Adj. R-squared:,-0.031
Method:,Least Squares,F-statistic:,1.496
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.155
Time:,01:33:11,Log-Likelihood:,-1459.2
No. Observations:,146,AIC:,2938.0
Df Residuals:,136,BIC:,2968.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-568.8439,2850.680,-0.200,0.842,-6156.074,5018.386
C(region)[T.Мурманская область],1484.3978,2927.207,0.507,0.612,-4252.822,7221.618
C(region)[T.Новгородская область],1044.8817,2871.931,0.364,0.716,-4583.999,6673.762
C(region)[T.Приморский край],788.3511,2807.560,0.281,0.779,-4714.365,6291.068
C(region)[T.Республика Алтай],2176.0011,2060.686,1.056,0.291,-1862.869,6214.872
C(region)[T.Тульская область],2453.1448,2928.022,0.838,0.402,-3285.673,8191.963
C(region)[T.Хабаровский край],2568.2114,2401.405,1.069,0.285,-2138.455,7274.878
treatment_period,235.6149,1746.368,0.135,0.893,-3187.203,3658.433
treatment_1,-13.5955,984.595,-0.014,0.989,-1943.367,1916.176

0,1,2,3
Omnibus:,175.06,Durbin-Watson:,1.83
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9761.346
Skew:,-4.239,Prob(JB):,0.0
Kurtosis:,42.15,Cond. No.,15.6


In [89]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,17.34
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00232
Time:,01:33:12,Log-Likelihood:,-1486.7
No. Observations:,151,AIC:,2981.0
Df Residuals:,147,BIC:,2993.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2711.2271,1341.280,2.021,0.043,82.367,5340.087
treatment_period,1890.2510,1637.213,1.155,0.248,-1318.628,5099.130
treatment_1,1959.3465,1928.096,1.016,0.310,-1819.652,5738.344
treatment_period:treatment_1,-4140.6313,1879.748,-2.203,0.028,-7824.870,-456.392

0,1,2,3
Omnibus:,81.846,Durbin-Watson:,1.45
Prob(Omnibus):,0.0,Jarque-Bera (JB):,316.724
Skew:,2.112,Prob(JB):,1.6799999999999998e-69
Kurtosis:,8.701,Cond. No.,14.3


In [90]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.27
Model:,OLS,Adj. R-squared:,0.223
Method:,Least Squares,F-statistic:,13.43
Date:,"Thu, 11 May 2023",Prob (F-statistic):,2.21e-15
Time:,01:33:12,Log-Likelihood:,-1465.7
No. Observations:,151,AIC:,2951.0
Df Residuals:,141,BIC:,2982.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7186.5044,1062.770,6.762,0.000,5103.513,9269.496
C(region)[T.Мурманская область],-6544.9312,1078.902,-6.066,0.000,-8659.541,-4430.321
C(region)[T.Новгородская область],-5647.0583,1251.684,-4.512,0.000,-8100.315,-3193.802
C(region)[T.Приморский край],-7127.2488,1065.652,-6.688,0.000,-9215.889,-5038.609
C(region)[T.Республика Алтай],-1978.3210,1135.812,-1.742,0.082,-4204.471,247.829
C(region)[T.Тульская область],-5148.5080,1367.277,-3.766,0.000,-7828.321,-2468.695
C(region)[T.Хабаровский край],-2918.7388,1354.048,-2.156,0.031,-5572.623,-264.854
treatment_period,-772.8597,1155.885,-0.669,0.504,-3038.352,1492.633
treatment_1,1575.9881,2111.898,0.746,0.456,-2563.257,5715.233

0,1,2,3
Omnibus:,100.226,Durbin-Watson:,1.756
Prob(Omnibus):,0.0,Jarque-Bera (JB):,832.388
Skew:,2.29,Prob(JB):,1.77e-181
Kurtosis:,13.551,Cond. No.,15.3


# Goods t-1 

In [91]:
with open("D:/DZ/Course_6/Diploma/Data/full_ind_data.csv", "rb") as inp:
    full_ind_data = pickle.load(inp)

In [92]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_75.csv", "rb") as inp:
    mun_data = pd.read_csv(inp, index_col=0)

In [93]:
mun_data.reset_index(inplace=True, drop=True)

In [94]:
full_ind_data.shape

(1386, 14)

In [95]:
full_ind_data.head()

Unnamed: 0,raion,region,treatment_1,treatment_2,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed
0,Владивостокский,Приморский край,1,1,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211
1,Владивостокский,Приморский край,1,1,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417
2,Владивостокский,Приморский край,1,1,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201
3,Владивостокский,Приморский край,1,1,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074
4,Владивостокский,Приморский край,1,1,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201


In [96]:
full_in_data = pd.merge(full_ind_data, mun_data, on=["raion", "region"], how="left")

In [97]:
full_ind_data = full_in_data.drop(columns=["treatment_1_x", "treatment_2_x"]).rename(columns={"treatment_1_y": "treatment_1", "treatment_2_y": "treatment_2"})

In [98]:
full_ind_data.shape

(1386, 14)

In [99]:
full_ind_data.head()

Unnamed: 0,raion,region,year,bad_roads,bad_schools,unfinished_construction,housing,population,lack_transportation,heat_grid_fixed,water_grid_fixed,sewage_grid_fixed,treatment_1,treatment_2
0,Владивостокский,Приморский край,2013,,,3400.0,4.05,625868.0,,0.05024,0.00447,0.001211,0,1
1,Владивостокский,Приморский край,2014,,,3400.0,3.88,630027.0,,0.02067,0.008552,0.004417,0,1
2,Владивостокский,Приморский край,2015,,0.0,21529.93,5.0,631387.0,,0.019753,0.009448,0.003201,0,1
3,Владивостокский,Приморский край,2016,,0.0,2411.41,5.17,633167.0,,0.011566,0.006601,0.001074,0,1
4,Владивостокский,Приморский край,2017,,0.0,,3.19,633414.0,,0.013158,0.003128,0.003201,0,1


In [100]:
two_time_points = pd.DataFrame()
for i in mapping.keys():
    two_time_points = two_time_points.append(full_ind_data.loc[(full_ind_data.region == i) & ((full_ind_data.year == mapping[i]) | (full_ind_data.year == mapping[i] + 4))])

two_time_points.reset_index(inplace=True, drop=True)

two_time_points["treatment_period"] = [1 if i > 2018 else 0 for i in two_time_points.year]

In [101]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1", data=treat_1_roads).fit(cov_type="cluster", cov_kwds={'groups': treat_1_roads['region']})
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,1.048
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.437
Time:,01:33:19,Log-Likelihood:,-992.75
No. Observations:,212,AIC:,1993.0
Df Residuals:,208,BIC:,2007.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,55.9499,7.722,7.245,0.000,40.814,71.086
treatment_period,-5.2727,8.014,-0.658,0.511,-20.981,10.435
treatment_1,8.9762,7.654,1.173,0.241,-6.025,23.978
treatment_period:treatment_1,-6.5868,7.289,-0.904,0.366,-20.872,7.698

0,1,2,3
Omnibus:,30.125,Durbin-Watson:,0.95
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9.536
Skew:,-0.216,Prob(JB):,0.0085
Kurtosis:,2.055,Cond. No.,10.3


In [102]:
treat_1_roads = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_roads"]].copy()
treat_1_roads.dropna(inplace=True)

roads = sm_f.ols(formula="bad_roads ~ treatment_period*treatment_1 + C(region)", data=treat_1_roads).fit(cov_type="HC3")
roads.summary()

0,1,2,3
Dep. Variable:,bad_roads,R-squared:,0.352
Model:,OLS,Adj. R-squared:,0.323
Method:,Least Squares,F-statistic:,11.45
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.9e-14
Time:,01:33:19,Log-Likelihood:,-948.52
No. Observations:,212,AIC:,1917.0
Df Residuals:,202,BIC:,1951.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,56.2570,4.361,12.901,0.000,47.710,64.804
C(region)[T.Мурманская область],-19.0019,6.444,-2.949,0.003,-31.632,-6.372
C(region)[T.Новгородская область],-3.1610,5.077,-0.623,0.534,-13.112,6.790
C(region)[T.Республика Алтай],23.4748,6.707,3.500,0.000,10.329,36.621
C(region)[T.Республика Марий Эл],17.2560,5.387,3.203,0.001,6.698,27.815
C(region)[T.Тульская область],-7.6720,5.728,-1.339,0.180,-18.898,3.554
C(region)[T.Хабаровский край],-29.1312,6.733,-4.327,0.000,-42.327,-15.935
treatment_period,-0.1798,4.594,-0.039,0.969,-9.184,8.824
treatment_1,10.7356,5.813,1.847,0.065,-0.657,22.128

0,1,2,3
Omnibus:,2.367,Durbin-Watson:,1.442
Prob(Omnibus):,0.306,Jarque-Bera (JB):,2.117
Skew:,-0.125,Prob(JB):,0.347
Kurtosis:,3.421,Cond. No.,11.1


In [103]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1", data=treat_1_schools).fit(cov_type="cluster", cov_kwds={'groups': treat_1_schools['region']})
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.138
Model:,OLS,Adj. R-squared:,0.119
Method:,Least Squares,F-statistic:,44.59
Date:,"Thu, 11 May 2023",Prob (F-statistic):,6.26e-05
Time:,01:33:20,Log-Likelihood:,-507.23
No. Observations:,139,AIC:,1022.0
Df Residuals:,135,BIC:,1034.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.1926,1.212,2.633,0.008,0.816,5.569
treatment_period,14.4741,3.986,3.631,0.000,6.661,22.287
treatment_1,-0.0676,1.769,-0.038,0.970,-3.535,3.400
treatment_period:treatment_1,-0.5991,4.508,-0.133,0.894,-9.435,8.237

0,1,2,3
Omnibus:,115.056,Durbin-Watson:,2.0
Prob(Omnibus):,0.0,Jarque-Bera (JB):,856.372
Skew:,3.124,Prob(JB):,1.1e-186
Kurtosis:,13.432,Cond. No.,13.4


In [104]:
treat_1_schools = two_time_points[["raion", "region", "treatment_1", "treatment_period", "bad_schools"]].copy()
treat_1_schools.dropna(inplace=True)

schools = sm_f.ols(formula="bad_schools ~ treatment_period*treatment_1 + C(region)", data=treat_1_schools).fit(cov_type="HC3")
schools.summary()

0,1,2,3
Dep. Variable:,bad_schools,R-squared:,0.366
Model:,OLS,Adj. R-squared:,0.316
Method:,Least Squares,F-statistic:,3.832
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00015
Time:,01:33:20,Log-Likelihood:,-485.91
No. Observations:,139,AIC:,993.8
Df Residuals:,128,BIC:,1026.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,28.8500,29.911,0.965,0.335,-29.774,87.474
C(region)[T.Мурманская область],-42.6384,30.697,-1.389,0.165,-102.804,17.527
C(region)[T.Новгородская область],-26.0905,29.936,-0.872,0.383,-84.763,32.582
C(region)[T.Приморский край],-29.3364,29.915,-0.981,0.327,-87.969,29.297
C(region)[T.Республика Алтай],-23.9318,30.016,-0.797,0.425,-82.762,34.899
C(region)[T.Республика Марий Эл],-27.3201,29.930,-0.913,0.361,-85.983,31.343
C(region)[T.Тульская область],-23.9040,30.015,-0.796,0.426,-82.732,34.924
C(region)[T.Хабаровский край],-7.1833,34.597,-0.208,0.836,-74.993,60.626
treatment_period,18.1884,4.124,4.410,0.000,10.105,26.272

0,1,2,3
Omnibus:,74.592,Durbin-Watson:,2.19
Prob(Omnibus):,0.0,Jarque-Bera (JB):,282.889
Skew:,2.043,Prob(JB):,3.73e-62
Kurtosis:,8.67,Cond. No.,26.8


In [105]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1", data=treat_1_sewerage).fit(cov_type="cluster", cov_kwds={'groups': treat_1_sewerage['region']})
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.016
Model:,OLS,Adj. R-squared:,-0.013
Method:,Least Squares,F-statistic:,3.449
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.108
Time:,01:33:20,Log-Likelihood:,121.6
No. Observations:,107,AIC:,-235.2
Df Residuals:,103,BIC:,-224.5
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0135,0.004,3.425,0.001,0.006,0.021
treatment_period,0.0176,0.016,1.069,0.285,-0.015,0.050
treatment_1,0.0294,0.027,1.093,0.274,-0.023,0.082
treatment_period:treatment_1,-0.0584,0.040,-1.452,0.146,-0.137,0.020

0,1,2,3
Omnibus:,197.823,Durbin-Watson:,1.565
Prob(Omnibus):,0.0,Jarque-Bera (JB):,20845.947
Skew:,7.752,Prob(JB):,0.0
Kurtosis:,69.598,Cond. No.,12.4


In [106]:
treat_1_sewerage = two_time_points[["raion", "region", "treatment_1", "treatment_period", "sewage_grid_fixed"]].copy()
treat_1_sewerage.dropna(inplace=True)

sewerage = sm_f.ols(formula="sewage_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_sewerage).fit(cov_type="HC3")
sewerage.summary()

0,1,2,3
Dep. Variable:,sewage_grid_fixed,R-squared:,0.067
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,1.284
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.261
Time:,01:33:21,Log-Likelihood:,124.43
No. Observations:,107,AIC:,-230.9
Df Residuals:,98,BIC:,-206.8
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0079,0.005,1.590,0.112,-0.002,0.018
C(region)[T.Мурманская область],-0.0064,0.014,-0.447,0.655,-0.035,0.022
C(region)[T.Новгородская область],0.0031,0.013,0.231,0.817,-0.023,0.029
C(region)[T.Приморский край],0.0397,0.023,1.730,0.084,-0.005,0.085
C(region)[T.Тульская область],-0.0075,0.012,-0.625,0.532,-0.031,0.016
C(region)[T.Хабаровский край],0.0020,0.005,0.378,0.705,-0.008,0.012
treatment_period,0.0053,0.022,0.237,0.813,-0.038,0.049
treatment_1,0.0269,0.038,0.706,0.480,-0.048,0.101
treatment_period:treatment_1,-0.0775,0.049,-1.580,0.114,-0.174,0.019

0,1,2,3
Omnibus:,194.402,Durbin-Watson:,1.672
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19884.481
Skew:,7.499,Prob(JB):,0.0
Kurtosis:,68.078,Cond. No.,15.2


In [107]:
treat_1_heat = two_time_points[["raion", "region", "treatment_1", "treatment_period", "heat_grid_fixed"]].copy()
treat_1_heat.dropna(inplace=True)

heat = sm_f.ols(formula="heat_grid_fixed ~ treatment_period*treatment_1", data=treat_1_heat).fit(cov_type="HC3") # т.к. в выборке только один регион
heat.summary()

0,1,2,3
Dep. Variable:,heat_grid_fixed,R-squared:,0.015
Model:,OLS,Adj. R-squared:,-0.04
Method:,Least Squares,F-statistic:,0.301
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.825
Time:,01:33:21,Log-Likelihood:,120.76
No. Observations:,58,AIC:,-233.5
Df Residuals:,54,BIC:,-225.3
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0240,0.003,7.359,0.000,0.018,0.030
treatment_period,0.0069,0.009,0.768,0.443,-0.011,0.025
treatment_1,0.0077,0.030,0.256,0.798,-0.052,0.067
treatment_period:treatment_1,-0.0195,0.033,-0.585,0.558,-0.085,0.046

0,1,2,3
Omnibus:,99.906,Durbin-Watson:,2.097
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2458.052
Skew:,5.098,Prob(JB):,0.0
Kurtosis:,33.219,Cond. No.,10.3


In [108]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1", data=treat_1_water).fit(cov_type="cluster", cov_kwds={'groups': treat_1_water['region']})
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.003
Model:,OLS,Adj. R-squared:,-0.016
Method:,Least Squares,F-statistic:,3.277
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.117
Time:,01:33:22,Log-Likelihood:,226.61
No. Observations:,158,AIC:,-445.2
Df Residuals:,154,BIC:,-433.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0260,0.009,2.854,0.004,0.008,0.044
treatment_period,-0.0072,0.006,-1.140,0.254,-0.020,0.005
treatment_1,-0.0021,0.017,-0.124,0.901,-0.036,0.032
treatment_period:treatment_1,0.0117,0.016,0.712,0.477,-0.021,0.044

0,1,2,3
Omnibus:,301.505,Durbin-Watson:,1.67
Prob(Omnibus):,0.0,Jarque-Bera (JB):,74618.819
Skew:,9.582,Prob(JB):,0.0
Kurtosis:,107.724,Cond. No.,15.5


In [109]:
treat_1_water = two_time_points[["raion", "region", "treatment_1", "treatment_period", "water_grid_fixed"]].copy()
treat_1_water.dropna(inplace=True)

water = sm_f.ols(formula="water_grid_fixed ~ treatment_period*treatment_1 + C(region)", data=treat_1_water).fit(cov_type="HC3")
water.summary()

0,1,2,3
Dep. Variable:,water_grid_fixed,R-squared:,0.086
Model:,OLS,Adj. R-squared:,0.036
Method:,Least Squares,F-statistic:,4.037
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000228
Time:,01:33:22,Log-Likelihood:,233.4
No. Observations:,158,AIC:,-448.8
Df Residuals:,149,BIC:,-421.2
Df Model:,8,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0104,0.004,2.960,0.003,0.004,0.017
C(region)[T.Мурманская область],0.0528,0.038,1.403,0.161,-0.021,0.127
C(region)[T.Новгородская область],0.0018,0.008,0.238,0.812,-0.013,0.017
C(region)[T.Приморский край],0.0133,0.008,1.727,0.084,-0.002,0.028
C(region)[T.Республика Алтай],0.0030,0.006,0.500,0.617,-0.009,0.015
C(region)[T.Хабаровский край],0.0230,0.007,3.370,0.001,0.010,0.036
treatment_period,-0.0097,0.014,-0.706,0.480,-0.037,0.017
treatment_1,0.0067,0.013,0.512,0.609,-0.019,0.033
treatment_period:treatment_1,0.0075,0.039,0.195,0.845,-0.068,0.083

0,1,2,3
Omnibus:,292.706,Durbin-Watson:,1.812
Prob(Omnibus):,0.0,Jarque-Bera (JB):,65731.604
Skew:,9.053,Prob(JB):,0.0
Kurtosis:,101.269,Cond. No.,17.1


# Fin t-1

In [110]:
with open("D:/DZ/Course_6/Diploma/Data/financial_data.csv", "rb") as inp:
    temp_fin = pickle.load(inp)

In [111]:
with open("D:/DZ/Course_6/Diploma/Data/mun_data_75.csv", "rb") as inp:
    data_for_models = pd.read_csv(inp, index_col=0).reset_index(drop=True)

In [112]:
mapping = {"Приморский край": 2016, "Мурманская область": 2016, "Костромская область": 2015, "Республика Алтай": 2014, "Республика Марий Эл": 2014, "Хабаровский край": 2014, 
           "Тульская область": 2014, "Новгородская область": 2016}

In [113]:
actual, initial, difference = temp_fin.loc[temp_fin.budget_type == "actual_spending"], temp_fin.loc[temp_fin.budget_type == "initial_budget"], temp_fin.loc[temp_fin.budget_type == "разница"]

In [114]:
actual = pd.pivot(actual, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
initial = pd.pivot(initial, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()
difference = pd.pivot(difference, index = ["region", "raion", "year"], columns="sub_indicator", values="values").reset_index()

In [115]:
for i in actual, initial, difference:
    i.rename(columns={"дорожное хозяйство (дорожные фонды)": "roads", "жилищно-коммунальное хозяйство": "housing", "образование": "education"}, inplace=True)

In [116]:
actual.head()

sub_indicator,region,raion,year,roads,housing,education
0,Костромская область,Антроповский район,2013,666.115702,1610.368144,12882.945154
1,Костромская область,Антроповский район,2014,603.232937,1922.79625,12638.11448
2,Костромская область,Антроповский район,2015,688.90871,95.255228,21901.154847
3,Костромская область,Антроповский район,2016,764.043811,203.425719,10986.12319
4,Костромская область,Антроповский район,2017,,,


In [117]:
frames = [actual, initial, difference]

In [118]:
for i in range(len(frames)):
    temp_fin_data = pd.DataFrame()
    for j in mapping.keys():
        temp_fin_data = temp_fin_data.append(frames[i].loc[(frames[i].region == j) & ((frames[i].year == mapping[j]) | (frames[i].year == mapping[j] + 4))])
    temp_fin_data["treatment_period"] = [1 if i > 2018 else 0 for i in temp_fin_data.year]
    frames[i] = pd.merge(temp_fin_data, data_for_models, on=["region", "raion"], how="inner")

In [119]:
#frames[0]

In [120]:
actual, initial, difference = frames

### Models for actual budgets

In [121]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=actual_roads_data).fit(cov_type="cluster", cov_kwds={'groups': actual_roads_data['region']})
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,3.365
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0843
Time:,01:33:36,Log-Likelihood:,-2314.8
No. Observations:,268,AIC:,4638.0
Df Residuals:,264,BIC:,4652.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1123.2335,176.809,6.353,0.000,776.695,1469.772
treatment_period,382.1249,269.123,1.420,0.156,-145.347,909.597
treatment_1,-238.1889,267.236,-0.891,0.373,-761.963,285.585
treatment_period:treatment_1,392.2147,301.766,1.300,0.194,-199.237,983.666

0,1,2,3
Omnibus:,116.954,Durbin-Watson:,1.819
Prob(Omnibus):,0.0,Jarque-Bera (JB):,369.227
Skew:,1.971,Prob(JB):,6.659999999999999e-81
Kurtosis:,7.187,Cond. No.,13.2


In [122]:
actual_roads_data = actual[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
actual_roads_data.dropna(inplace=True)
actual_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=actual_roads_data).fit(cov_type="HC3")
actual_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.147
Model:,OLS,Adj. R-squared:,0.114
Method:,Least Squares,F-statistic:,6.362
Date:,"Thu, 11 May 2023",Prob (F-statistic):,9.82e-09
Time:,01:33:36,Log-Likelihood:,-2295.7
No. Observations:,268,AIC:,4613.0
Df Residuals:,257,BIC:,4653.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1181.7281,278.011,4.251,0.000,636.836,1726.620
C(region)[T.Мурманская область],884.5304,609.377,1.452,0.147,-309.827,2078.888
C(region)[T.Новгородская область],-908.7459,315.907,-2.877,0.004,-1527.912,-289.580
C(region)[T.Приморский край],-460.7539,286.823,-1.606,0.108,-1022.916,101.408
C(region)[T.Республика Алтай],611.2868,454.075,1.346,0.178,-278.684,1501.258
C(region)[T.Республика Марий Эл],-398.9517,309.980,-1.287,0.198,-1006.501,208.597
C(region)[T.Тульская область],474.3172,399.602,1.187,0.235,-308.888,1257.522
C(region)[T.Хабаровский край],137.6469,366.523,0.376,0.707,-580.726,856.020
treatment_period,775.5766,215.432,3.600,0.000,353.338,1197.815

0,1,2,3
Omnibus:,119.495,Durbin-Watson:,2.036
Prob(Omnibus):,0.0,Jarque-Bera (JB):,419.676
Skew:,1.955,Prob(JB):,7.39e-92
Kurtosis:,7.722,Cond. No.,14.2


In [123]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=actual_housing_data).fit(cov_type="cluster", cov_kwds={'groups': actual_housing_data['region']})
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,4.727
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0416
Time:,01:33:37,Log-Likelihood:,-2821.8
No. Observations:,265,AIC:,5652.0
Df Residuals:,261,BIC:,5666.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4574.6650,2764.324,1.655,0.098,-843.310,9992.640
treatment_period,-2735.5662,2791.523,-0.980,0.327,-8206.850,2735.718
treatment_1,-3476.5297,2749.550,-1.264,0.206,-8865.548,1912.488
treatment_period:treatment_1,2078.9756,2771.834,0.750,0.453,-3353.719,7511.670

0,1,2,3
Omnibus:,363.291,Durbin-Watson:,1.119
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27351.377
Skew:,6.514,Prob(JB):,0.0
Kurtosis:,51.035,Cond. No.,13.1


In [124]:
actual_housing_data = actual[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
actual_housing_data.dropna(inplace=True)
actual_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=actual_housing_data).fit(cov_type="HC3")
actual_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.26
Model:,OLS,Adj. R-squared:,0.231
Method:,Least Squares,F-statistic:,9.294
Date:,"Thu, 11 May 2023",Prob (F-statistic):,4.17e-13
Time:,01:33:37,Log-Likelihood:,-2784.1
No. Observations:,265,AIC:,5590.0
Df Residuals:,254,BIC:,5630.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2395.1694,504.064,4.752,0.000,1407.222,3383.117
C(region)[T.Мурманская область],901.4127,702.726,1.283,0.200,-475.905,2278.730
C(region)[T.Новгородская область],-2378.2420,537.732,-4.423,0.000,-3432.177,-1324.307
C(region)[T.Приморский край],-1436.7470,497.850,-2.886,0.004,-2412.515,-460.979
C(region)[T.Республика Алтай],86.0359,638.586,0.135,0.893,-1165.570,1337.642
C(region)[T.Республика Марий Эл],-1092.8545,532.564,-2.052,0.040,-2136.662,-49.047
C(region)[T.Тульская область],163.5663,659.533,0.248,0.804,-1129.095,1456.227
C(region)[T.Хабаровский край],1.398e+04,3869.044,3.615,0.000,6401.680,2.16e+04
treatment_period,872.2052,247.323,3.527,0.000,387.462,1356.949

0,1,2,3
Omnibus:,340.594,Durbin-Watson:,1.433
Prob(Omnibus):,0.0,Jarque-Bera (JB):,24298.287
Skew:,5.793,Prob(JB):,0.0
Kurtosis:,48.457,Cond. No.,14.2


In [125]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=actual_education_data).fit(cov_type="cluster", cov_kwds={'groups': actual_education_data['region']})
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.011
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,15.83
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.00168
Time:,01:33:38,Log-Likelihood:,-2812.7
No. Observations:,268,AIC:,5633.0
Df Residuals:,264,BIC:,5648.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.543e+04,2902.424,5.317,0.000,9744.946,2.11e+04
treatment_period,-1495.2909,2948.245,-0.507,0.612,-7273.745,4283.163
treatment_1,-2338.0992,3068.343,-0.762,0.446,-8351.940,3675.742
treatment_period:treatment_1,9454.2181,3047.838,3.102,0.002,3480.566,1.54e+04

0,1,2,3
Omnibus:,173.4,Durbin-Watson:,0.834
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1394.414
Skew:,2.595,Prob(JB):,1.61e-303
Kurtosis:,12.897,Cond. No.,13.2


In [126]:
actual_education_data = actual[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
actual_education_data.dropna(inplace=True)
actual_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=actual_education_data).fit(cov_type="HC3")
actual_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.443
Model:,OLS,Adj. R-squared:,0.422
Method:,Least Squares,F-statistic:,43.84
Date:,"Thu, 11 May 2023",Prob (F-statistic):,5.6e-50
Time:,01:33:38,Log-Likelihood:,-2735.7
No. Observations:,268,AIC:,5493.0
Df Residuals:,257,BIC:,5533.0
Df Model:,10,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.634e+04,1088.834,15.007,0.000,1.42e+04,1.85e+04
C(region)[T.Мурманская область],3075.3092,1250.131,2.460,0.014,625.097,5525.522
C(region)[T.Новгородская область],-7304.2636,1267.442,-5.763,0.000,-9788.405,-4820.122
C(region)[T.Приморский край],-4952.8890,1188.660,-4.167,0.000,-7282.621,-2623.157
C(region)[T.Республика Алтай],4273.5564,1649.335,2.591,0.010,1040.919,7506.194
C(region)[T.Республика Марий Эл],-7377.2090,1143.960,-6.449,0.000,-9619.330,-5135.088
C(region)[T.Тульская область],-5747.1358,1136.721,-5.056,0.000,-7975.067,-3519.205
C(region)[T.Хабаровский край],9750.0629,2642.001,3.690,0.000,4571.836,1.49e+04
treatment_period,2191.2738,765.865,2.861,0.004,690.206,3692.342

0,1,2,3
Omnibus:,169.232,Durbin-Watson:,1.254
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1860.007
Skew:,2.363,Prob(JB):,0.0
Kurtosis:,15.01,Cond. No.,14.2


### Models for initial budgets

In [127]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=initial_roads_data).fit(cov_type="cluster", cov_kwds={'groups': initial_roads_data['region']})
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.027
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,4.734
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0505
Time:,01:33:39,Log-Likelihood:,-2097.8
No. Observations:,225,AIC:,4204.0
Df Residuals:,221,BIC:,4217.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,547.7195,99.090,5.527,0.000,353.506,741.933
treatment_period,944.3923,546.629,1.728,0.084,-126.981,2015.766
treatment_1,99.7108,151.879,0.657,0.511,-197.967,397.389
treatment_period:treatment_1,-994.9257,449.761,-2.212,0.027,-1876.441,-113.410

0,1,2,3
Omnibus:,466.871,Durbin-Watson:,2.029
Prob(Omnibus):,0.0,Jarque-Bera (JB):,296794.782
Skew:,12.678,Prob(JB):,0.0
Kurtosis:,179.111,Cond. No.,9.87


In [128]:
initial_roads_data = initial[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
initial_roads_data.dropna(inplace=True)
initial_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=initial_roads_data).fit(cov_type="HC3")
initial_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.053
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,3.759
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000205
Time:,01:33:39,Log-Likelihood:,-2094.7
No. Observations:,225,AIC:,4209.0
Df Residuals:,215,BIC:,4244.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1183.7209,727.247,1.628,0.104,-241.658,2609.100
C(region)[T.Мурманская область],-282.3143,1043.848,-0.270,0.787,-2328.219,1763.590
C(region)[T.Новгородская область],-1323.6755,973.388,-1.360,0.174,-3231.480,584.129
C(region)[T.Приморский край],-854.0444,925.828,-0.922,0.356,-2668.634,960.546
C(region)[T.Республика Алтай],-804.3847,733.160,-1.097,0.273,-2241.353,632.583
C(region)[T.Тульская область],-232.1603,717.600,-0.324,0.746,-1638.630,1174.310
C(region)[T.Хабаровский край],-571.2672,742.189,-0.770,0.441,-2025.931,883.396
treatment_period,986.5863,501.292,1.968,0.049,4.072,1969.100
treatment_1,-226.1856,385.278,-0.587,0.557,-981.316,528.945

0,1,2,3
Omnibus:,463.986,Durbin-Watson:,2.092
Prob(Omnibus):,0.0,Jarque-Bera (JB):,287145.939
Skew:,12.517,Prob(JB):,0.0
Kurtosis:,176.211,Cond. No.,11.0


In [129]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=initial_housing_data).fit(cov_type="cluster", cov_kwds={'groups': initial_housing_data['region']})
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.004
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,3.937
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0722
Time:,01:33:39,Log-Likelihood:,-2554.3
No. Observations:,232,AIC:,5117.0
Df Residuals:,228,BIC:,5130.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4246.6475,2819.847,1.506,0.132,-1280.150,9773.445
treatment_period,-509.0605,3035.187,-0.168,0.867,-6457.918,5439.797
treatment_1,-4048.7080,2820.327,-1.436,0.151,-9576.448,1479.032
treatment_period:treatment_1,632.3432,3046.229,0.208,0.836,-5338.155,6602.842

0,1,2,3
Omnibus:,360.121,Durbin-Watson:,1.626
Prob(Omnibus):,0.0,Jarque-Bera (JB):,49697.224
Skew:,7.581,Prob(JB):,0.0
Kurtosis:,73.08,Cond. No.,10.2


In [130]:
initial_housing_data = initial[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
initial_housing_data.dropna(inplace=True)
initial_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=initial_housing_data).fit(cov_type="HC3")
initial_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.101
Model:,OLS,Adj. R-squared:,0.065
Method:,Least Squares,F-statistic:,4.167
Date:,"Thu, 11 May 2023",Prob (F-statistic):,5.51e-05
Time:,01:33:40,Log-Likelihood:,-2542.4
No. Observations:,232,AIC:,5105.0
Df Residuals:,222,BIC:,5139.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,5620.3871,4403.290,1.276,0.202,-3009.903,1.43e+04
C(region)[T.Мурманская область],-4468.2168,5196.974,-0.860,0.390,-1.47e+04,5717.665
C(region)[T.Новгородская область],-6449.0298,5117.758,-1.260,0.208,-1.65e+04,3581.591
C(region)[T.Приморский край],-4842.3923,4998.300,-0.969,0.333,-1.46e+04,4954.095
C(region)[T.Республика Алтай],-4871.6669,4406.142,-1.106,0.269,-1.35e+04,3764.213
C(region)[T.Тульская область],-4438.9696,4310.985,-1.030,0.303,-1.29e+04,4010.405
C(region)[T.Хабаровский край],7709.8584,5636.203,1.368,0.171,-3336.897,1.88e+04
treatment_period,2175.6238,2048.205,1.062,0.288,-1838.783,6190.031
treatment_1,-1387.0475,840.613,-1.650,0.099,-3034.619,260.524

0,1,2,3
Omnibus:,362.918,Durbin-Watson:,1.813
Prob(Omnibus):,0.0,Jarque-Bera (JB):,56951.291
Skew:,7.621,Prob(JB):,0.0
Kurtosis:,78.228,Cond. No.,11.3


In [131]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=initial_education_data).fit(cov_type="cluster", cov_kwds={'groups': initial_education_data['region']})
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,1.388
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.334
Time:,01:33:40,Log-Likelihood:,-2524.1
No. Observations:,241,AIC:,5056.0
Df Residuals:,237,BIC:,5070.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.364e+04,2733.970,4.989,0.000,8281.993,1.9e+04
treatment_period,-1269.8032,2821.035,-0.450,0.653,-6798.931,4259.324
treatment_1,-3772.8362,3412.868,-1.105,0.269,-1.05e+04,2916.263
treatment_period:treatment_1,9292.4458,4836.280,1.921,0.055,-186.488,1.88e+04

0,1,2,3
Omnibus:,140.58,Durbin-Watson:,0.859
Prob(Omnibus):,0.0,Jarque-Bera (JB):,928.275
Skew:,2.295,Prob(JB):,2.68e-202
Kurtosis:,11.448,Cond. No.,10.1


In [132]:
initial_education_data = initial[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
initial_education_data.dropna(inplace=True)
initial_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=initial_education_data).fit(cov_type="HC3")
initial_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.4
Model:,OLS,Adj. R-squared:,0.376
Method:,Least Squares,F-statistic:,36.87
Date:,"Thu, 11 May 2023",Prob (F-statistic):,5.23e-40
Time:,01:33:40,Log-Likelihood:,-2464.6
No. Observations:,241,AIC:,4949.0
Df Residuals:,231,BIC:,4984.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7044.8483,671.891,10.485,0.000,5727.966,8361.730
C(region)[T.Мурманская область],1.331e+04,891.260,14.932,0.000,1.16e+04,1.51e+04
C(region)[T.Новгородская область],7.3530,873.394,0.008,0.993,-1704.468,1719.174
C(region)[T.Приморский край],4125.4623,951.145,4.337,0.000,2261.253,5989.672
C(region)[T.Республика Алтай],7618.5069,1271.566,5.991,0.000,5126.284,1.01e+04
C(region)[T.Тульская область],2292.8408,965.454,2.375,0.018,400.586,4185.095
C(region)[T.Хабаровский край],1.472e+04,2510.484,5.863,0.000,9798.382,1.96e+04
treatment_period,1996.7984,623.602,3.202,0.001,774.561,3219.036
treatment_1,621.9937,2130.742,0.292,0.770,-3554.183,4798.171

0,1,2,3
Omnibus:,133.856,Durbin-Watson:,1.229
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1338.08
Skew:,1.97,Prob(JB):,2.75e-291
Kurtosis:,13.851,Cond. No.,11.0


### Models for difference

In [133]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1", data=difference_roads_data).fit(cov_type="cluster", cov_kwds={'groups': difference_roads_data['region']})
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.013
Method:,Least Squares,F-statistic:,8.297
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0148
Time:,01:33:41,Log-Likelihood:,-1689.4
No. Observations:,200,AIC:,3387.0
Df Residuals:,196,BIC:,3400.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,539.2748,136.744,3.944,0.000,271.261,807.289
treatment_period,12.3894,121.326,0.102,0.919,-225.406,250.185
treatment_1,-51.5161,357.716,-0.144,0.885,-752.626,649.594
treatment_period:treatment_1,545.4410,354.711,1.538,0.124,-149.780,1240.662

0,1,2,3
Omnibus:,151.581,Durbin-Watson:,2.038
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1370.924
Skew:,2.964,Prob(JB):,2.03e-298
Kurtosis:,14.375,Cond. No.,12.0


In [134]:
difference_roads_data = difference[["raion", "region", "treatment_1", "treatment_period", "roads"]].copy()
difference_roads_data.dropna(inplace=True)
difference_roads = sm_f.ols(formula="roads ~ treatment_period*treatment_1 + C(region)", data=difference_roads_data).fit(cov_type="HC3")
difference_roads.summary()

0,1,2,3
Dep. Variable:,roads,R-squared:,0.075
Model:,OLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,3.312
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000887
Time:,01:33:41,Log-Likelihood:,-1681.8
No. Observations:,200,AIC:,3384.0
Df Residuals:,190,BIC:,3417.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,968.7519,320.386,3.024,0.002,340.806,1596.697
C(region)[T.Мурманская область],-403.4435,523.964,-0.770,0.441,-1430.394,623.507
C(region)[T.Новгородская область],-839.7123,332.220,-2.528,0.011,-1490.852,-188.572
C(region)[T.Приморский край],-761.4154,306.220,-2.487,0.013,-1361.595,-161.236
C(region)[T.Республика Алтай],-657.4333,347.200,-1.894,0.058,-1337.933,23.066
C(region)[T.Тульская область],64.9155,498.104,0.130,0.896,-911.349,1041.180
C(region)[T.Хабаровский край],-293.5854,370.528,-0.792,0.428,-1019.807,432.637
treatment_period,271.3557,172.278,1.575,0.115,-66.304,609.015
treatment_1,-367.6594,464.397,-0.792,0.429,-1277.861,542.542

0,1,2,3
Omnibus:,150.497,Durbin-Watson:,2.11
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1390.332
Skew:,2.921,Prob(JB):,1.24e-302
Kurtosis:,14.52,Cond. No.,13.3


In [135]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1", data=difference_housing_data).fit(cov_type="cluster", cov_kwds={'groups': difference_housing_data['region']})
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.011
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,6.688
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.0243
Time:,01:33:42,Log-Likelihood:,-2069.6
No. Observations:,205,AIC:,4147.0
Df Residuals:,201,BIC:,4161.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1261.1120,737.907,1.709,0.087,-185.158,2707.382
treatment_period,-1377.0872,916.893,-1.502,0.133,-3174.165,419.991
treatment_1,-472.2348,741.358,-0.637,0.524,-1925.269,980.799
treatment_period:treatment_1,606.4174,1002.841,0.605,0.545,-1359.116,2571.950

0,1,2,3
Omnibus:,159.348,Durbin-Watson:,2.169
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13733.327
Skew:,-2.122,Prob(JB):,0.0
Kurtosis:,42.872,Cond. No.,12.3


In [136]:
difference_housing_data = difference[["raion", "region", "treatment_1", "treatment_period", "housing"]].copy()
difference_housing_data.dropna(inplace=True)
difference_housing = sm_f.ols(formula="housing ~ treatment_period*treatment_1 + C(region)", data=difference_housing_data).fit(cov_type="HC3")
difference_housing.summary()

0,1,2,3
Dep. Variable:,housing,R-squared:,0.053
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,3.725
Date:,"Thu, 11 May 2023",Prob (F-statistic):,0.000244
Time:,01:33:42,Log-Likelihood:,-2065.2
No. Observations:,205,AIC:,4150.0
Df Residuals:,195,BIC:,4184.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-530.8133,3206.885,-0.166,0.869,-6816.191,5754.565
C(region)[T.Мурманская область],1126.2763,3040.164,0.370,0.711,-4832.335,7084.888
C(region)[T.Новгородская область],785.6256,2993.918,0.262,0.793,-5082.345,6653.596
C(region)[T.Приморский край],283.5824,3038.197,0.093,0.926,-5671.174,6238.339
C(region)[T.Республика Алтай],2644.8684,3241.849,0.816,0.415,-3709.039,8998.776
C(region)[T.Тульская область],1678.1076,3127.615,0.537,0.592,-4451.905,7808.121
C(region)[T.Хабаровский край],3877.8869,3525.329,1.100,0.271,-3031.632,1.08e+04
treatment_period,-121.9030,896.380,-0.136,0.892,-1878.775,1634.969
treatment_1,137.9337,660.574,0.209,0.835,-1156.768,1432.636

0,1,2,3
Omnibus:,166.316,Durbin-Watson:,2.276
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12376.787
Skew:,-2.343,Prob(JB):,0.0
Kurtosis:,40.776,Cond. No.,13.6


In [137]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1", data=difference_education_data).fit(cov_type="cluster", cov_kwds={'groups': difference_education_data['region']})
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.043
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,208.4
Date:,"Thu, 11 May 2023",Prob (F-statistic):,1.87e-06
Time:,01:33:43,Log-Likelihood:,-2086.2
No. Observations:,212,AIC:,4180.0
Df Residuals:,208,BIC:,4194.0
Df Model:,3,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3096.9725,1093.742,2.832,0.005,953.277,5240.668
treatment_period,-980.1790,764.955,-1.281,0.200,-2479.464,519.106
treatment_1,111.1524,860.828,0.129,0.897,-1576.039,1798.344
treatment_period:treatment_1,-8299.1550,1498.370,-5.539,0.000,-1.12e+04,-5362.405

0,1,2,3
Omnibus:,69.11,Durbin-Watson:,1.24
Prob(Omnibus):,0.0,Jarque-Bera (JB):,164.562
Skew:,1.488,Prob(JB):,1.84e-36
Kurtosis:,6.126,Cond. No.,12.3


In [138]:
difference_education_data = difference[["raion", "region", "treatment_1", "treatment_period", "education"]].copy()
difference_education_data.dropna(inplace=True)
difference_education = sm_f.ols(formula="education ~ treatment_period*treatment_1 + C(region)", data=difference_education_data).fit(cov_type="HC3")
difference_education.summary()

0,1,2,3
Dep. Variable:,education,R-squared:,0.322
Model:,OLS,Adj. R-squared:,0.292
Method:,Least Squares,F-statistic:,13.78
Date:,"Thu, 11 May 2023",Prob (F-statistic):,3.47e-17
Time:,01:33:43,Log-Likelihood:,-2049.6
No. Observations:,212,AIC:,4119.0
Df Residuals:,202,BIC:,4153.0
Df Model:,9,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,8461.5759,1014.785,8.338,0.000,6472.633,1.05e+04
C(region)[T.Мурманская область],-7676.5772,1024.600,-7.492,0.000,-9684.757,-5668.398
C(region)[T.Новгородская область],-6616.1975,1110.728,-5.957,0.000,-8793.185,-4439.211
C(region)[T.Приморский край],-8149.9802,1014.834,-8.031,0.000,-1.01e+04,-6160.941
C(region)[T.Республика Алтай],-3133.7405,1249.516,-2.508,0.012,-5582.747,-684.734
C(region)[T.Тульская область],-7227.9707,1100.413,-6.568,0.000,-9384.741,-5071.201
C(region)[T.Хабаровский край],-4493.0881,1413.201,-3.179,0.001,-7262.911,-1723.265
treatment_period,471.8610,573.910,0.822,0.411,-652.982,1596.704
treatment_1,713.3275,1947.714,0.366,0.714,-3104.123,4530.778

0,1,2,3
Omnibus:,89.732,Durbin-Watson:,1.562
Prob(Omnibus):,0.0,Jarque-Bera (JB):,455.671
Skew:,1.581,Prob(JB):,1.13e-99
Kurtosis:,9.448,Cond. No.,13.5
