In [2]:
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np
from statsmodels.iolib.summary2 import summary_col

data=pd.read_excel(r"C:\Users\13298\Desktop\project\data\processed\panel.xlsx")

data['past_experience']=data['last']-data['now']

data['past_experience_dummy'] = (data['past_experience'] > 0).astype(int)

def simplify_pattern(pattern):
    simplified = ""
    prev_char = None
    for char in pattern:
        if char != prev_char:
            simplified += char
            prev_char = char
    return simplified

grouped = data.groupby('city')['past_experience_dummy'].apply(lambda x: ''.join(map(str, x)))

pattern_df = grouped.reset_index(name='original_pattern')

pattern_df['simplified_pattern'] = pattern_df['original_pattern'].apply(simplify_pattern)

pattern_dict = {}
for pattern, group in pattern_df.groupby('simplified_pattern'):
    pattern_dict[pattern] = group['city'].tolist()

for pattern, cities in pattern_dict.items():
    print(f"变化模式: {pattern}")
    print("涉及的城市:", cities)
    print()

pattern_0101_city = ['温州市']
pattern_101_cities = ['东莞市', '东营市', '临汾市', '丹东市', '九江市', '南京市', '大连市', '天水市', '娄底市', '德阳市', '新乡市', '桂林市', '永州市', '泰安市', '泰州市', '珠海市', '益阳市', '菏泽市', '衡水市', '衢州市', '郑州市', '铁岭市', '长治市', '随州市', '马鞍山市', '黄冈市', '齐齐哈尔市']
pattern_1010_cities = ['新余市', '晋中市', '朔州市', '松原市', '烟台市', '盘锦市', '锦州市']

filtered_data = data.copy()
for city in pattern_0101_city:
    if city in pattern_df['city'].values:
        original_pattern = pattern_df[pattern_df['city'] == city]['original_pattern'].values[0]
        new_pattern = original_pattern.rstrip('1')
        city_data = filtered_data[filtered_data['city'] == city].sort_values('year')
        num_to_remove = len(original_pattern) - len(new_pattern)
        if num_to_remove > 0:
            keep_index = city_data.index[:-num_to_remove]
            filtered_data = pd.concat([filtered_data[filtered_data['city'] != city], filtered_data.loc[keep_index]])

for city in pattern_101_cities:
    if city in pattern_df['city'].values:
        original_pattern = pattern_df[pattern_df['city'] == city]['original_pattern'].values[0]
        new_pattern = original_pattern.lstrip('1')
        city_data = filtered_data[filtered_data['city'] == city].sort_values('year')

        num_to_remove = len(original_pattern) - len(new_pattern)
        if num_to_remove > 0:

            keep_index = city_data.index[num_to_remove:]

            filtered_data = pd.concat([filtered_data[filtered_data['city'] != city], filtered_data.loc[keep_index]])

for city in pattern_1010_cities:
    if city in pattern_df['city'].values:
        original_pattern = pattern_df[pattern_df['city'] == city]['original_pattern'].values[0]
        new_pattern = original_pattern.lstrip('1')
        city_data = filtered_data[filtered_data['city'] == city].sort_values('year')
        num_to_remove = len(original_pattern) - len(new_pattern)
        if num_to_remove > 0:
            keep_index = city_data.index[num_to_remove:]
            filtered_data = pd.concat([filtered_data[filtered_data['city'] != city], filtered_data.loc[keep_index]])

def filter_sequence(group):
    group = group.sort_values('year')
    one_index = group[group['past_experience_dummy'] == 1].index
    zero_index = group[group['past_experience_dummy'] == 0].index
    if len(one_index) == 0:
        for i in range(1, len(zero_index) + 1):
            group[f'pre_treat{i}'] = 0
        for i in range(len(zero_index)):
            group.loc[zero_index[i], f'pre_treat{i + 1}'] = 1
        return group
    pre_zero_index = [idx for idx in zero_index if idx < one_index[0]]
    post_zero_index = [idx for idx in zero_index if idx > one_index[-1]]
    for i in range(1, len(pre_zero_index) + 1):
        group[f'pre_treat{i}'] = 0
    for i in range(len(pre_zero_index)):
        group.loc[pre_zero_index[i], f'pre_treat{i + 1}'] = 1
    for i in range(1, len(one_index) + 1):
        group[f'treat{i}'] = 0
    for i in range(len(one_index)):
        group.loc[one_index[i], f'treat{i + 1}'] = 1
    for i in range(1, len(post_zero_index) + 1):
        group[f'post_treat{i}'] = 0
    for i in range(len(post_zero_index)):
        group.loc[post_zero_index[i], f'post_treat{i + 1}'] = 1
    return group

filtered_grouped = filtered_data.groupby('city').apply(filter_sequence)

filtered_grouped.reset_index(drop=True, inplace=True)
filtered_grouped.fillna(0, inplace=True)

def print_variable_stats(group):
    treat_vars = [col for col in group.columns if col.startswith('treat')]
    pre_treat_vars = [col for col in group.columns if col.startswith('pre_treat')]
    post_treat_vars = [col for col in group.columns if col.startswith('post_treat')]
    for treat_var in treat_vars:
        treat_count = group[treat_var].sum()
        treat_stats = group[treat_var].describe()
        print(f"\n{treat_var} 变量的数量: {treat_count}")
    for pre_treat_var in pre_treat_vars:
        pre_treat_count = group[pre_treat_var].sum()
        pre_treat_stats = group[pre_treat_var].describe()
        print(f"\n{pre_treat_var} 变量的数量: {pre_treat_count}")
    for post_treat_var in post_treat_vars:
        post_treat_count = group[post_treat_var].sum()
        post_treat_stats = group[post_treat_var].describe()
        print(f"\n{post_treat_var} 变量的数量: {post_treat_count}")
print_variable_stats(filtered_grouped)

excel_file_path = r"C:\Users\13298\Desktop\project\data\processed\panel2.xlsx"
filtered_grouped.to_excel(excel_file_path, index=False)
print(f"数据已保存到 {excel_file_path}")

变化模式: 0
涉及的城市: ['上海市', '中山市', '临沧市', '丽江市', '六安市', '十堰市', '吉林市', '呼伦贝尔市', '哈尔滨市', '嘉兴市', '嘉峪关市', '宣城市', '崇左市', '巴彦淖尔市', '常州市', '广安市', '廊坊市', '延安市', '忻州市', '抚州市', '昆明市', '曲靖市', '来宾市', '杭州市', '汕头市', '济宁市', '海口市', '淮安市', '深圳市', '湛江市', '滨州市', '焦作市', '玉溪市', '白山市', '盐城市', '石嘴山市', '芜湖市', '莆田市', '营口市', '西安市', '郴州市', '钦州市', '铜陵市', '长沙市', '阜阳市', '防城港市', '陇南市', '雅安市', '青岛市']

变化模式: 01
涉及的城市: ['乌鲁木齐市', '亳州市', '佛山市', '佳木斯市', '保定市', '六盘水市', '南昌市', '南阳市', '厦门市', '双鸭山市', '合肥市', '周口市', '呼和浩特市', '咸宁市', '唐山市', '四平市', '太原市', '安康市', '岳阳市', '常德市', '平顶山市', '德州市', '怀化市', '扬州市', '揭阳市', '无锡市', '日照市', '昭通市', '晋城市', '柳州市', '武汉市', '汕尾市', '洛阳市', '济南市', '渭南市', '潍坊市', '石家庄市', '绍兴市', '绵阳市', '舟山市', '茂名市', '荆门市', '萍乡市', '葫芦岛市', '西宁市', '辽源市', '辽阳市', '鄂尔多斯市', '鄂州市', '酒泉市', '重庆市', '金昌市', '铜川市', '阜新市', '驻马店市']

变化模式: 010
涉及的城市: ['七台河市', '伊春市', '兰州市', '包头市', '大庆市', '宁波市', '宝鸡市', '宿州市', '成都市', '江门市', '沈阳市', '泉州市', '玉林市', '苏州市', '衡阳市', '贵港市', '运城市', '金华市', '长春市', '韶关市', '鹤壁市', '黄石市', '龙岩市']

变化模式: 0101
涉及的城市: ['温州市']

变化模式: 1


  filtered_grouped = filtered_data.groupby('city').apply(filter_sequence)



treat1 变量的数量: 226.0

treat2 变量的数量: 195.0

treat3 变量的数量: 147.0

treat4 变量的数量: 102.0

treat5 变量的数量: 70.0

treat6 变量的数量: 53.0

treat7 变量的数量: 45.0

pre_treat1 变量的数量: 162.0

pre_treat2 变量的数量: 144.0

pre_treat3 变量的数量: 116.0

pre_treat4 变量的数量: 97.0

pre_treat5 变量的数量: 71.0

pre_treat6 变量的数量: 53.0

pre_treat7 变量的数量: 49.0

post_treat1 变量的数量: 99.0

post_treat2 变量的数量: 86.0

post_treat3 变量的数量: 68.0

post_treat4 变量的数量: 41.0

post_treat5 变量的数量: 22.0

post_treat6 变量的数量: 8.0
数据已保存到 C:\Users\13298\Desktop\project\data\processed\panel2.xlsx


In [None]:
import excel "C:\Users\13298\Desktop\project\data\processed\panel2.xlsx", firstrow clear

encode city, gen(city_numeric)
sort city_numeric year
xtset city_numeric year

bysort city: egen ever_treated = max(past_experience_dummy)
gen control_cohort = (ever_treated == 0)

eventstudyinteract Efficiency1 pre_treat6 pre_treat5 pre_treat4 pre_treat3 pre_treat2 treat1 treat2 treat3 treat4 treat5 treat6 treat7 post_treat1 post_treat2 post_treat3 post_treat4 post_treat5 post_treat6, absorb(city_numeric year) cohort(year) control_cohort(control_cohort) covariates(industry_structure Fiscal_autonomy population_log perGDP_log age male education)

// 首先保存系数
matrix C = e(b_iw)

// 保存标准误
mata st_matrix("A",sqrt(diagonal(st_matrix("e(V_iw)"))))
matrix C = C \ A'

coefplot matrix(C[1]), se(C[2]) ///
    vertical ///
    yline(0, lcolor(red) lpattern(dash)) ///    
    ciopts(color(black)) ///                   
    mcolor(black) ///                           
    xlabel(1 "t-6" 2 "t-5" 3 "t-4" 4 "t-3" 5 "t-2" ///
           6 "t1" 7 "t2" 8 "t3" 9 "t4" 10 "t5" 11 "t6" 12 "t7" ///
           13 "t+1" 14 "t+2" 15 "t+3" 16 "t+4" 17 "t+5" 18 "t+6") ///
    xlabel(,angle(45)) ///
    ylabel(,grid) ///
    title("系数估计图") ///
    xtitle("时间") ///
    ytitle("估计系数") ///
    graphregion(color(white)) ///              
    plotregion(color(white)) ///               
    p1(color(black))                            

: 