In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [4]:
data = pd.read_excel('山河四省.xlsx')
data = {
    'factor_1': data['财政一般预算收入（亿元）'].iloc[60:80],
    'target': data['城镇居民人均可支配收入（元）'].iloc[60:80]
}
df = pd.DataFrame(data)

In [5]:
# 分离包含缺失值和不包含缺失值的数据
train_data = df[df['target'].notnull()]
test_data = df[df['target'].isnull()]

# 确保自变量没有缺失值
train_data = train_data.dropna(subset=['factor_1'])
train_target = train_data['target']

# 训练线性回归模型
model = LinearRegression()
model.fit(train_data[['factor_1']], train_target)

# 预测缺失值
if not test_data.empty:
    # 预测目标列的缺失值
    predicted_values = model.predict(test_data[['factor_1']])
    
    # 将预测值填回原数据
    df.loc[df['target'].isnull(), 'target'] = predicted_values

print(df['target'].to_string(index=False))

 7351.323504
 8013.493230
 8808.053112
 9840.900677
11175.822583
12247.350845
13102.413173
15930.260000
18194.800000
20442.600000
21740.700000
23672.100000
25575.600000
27232.920000
29557.860000
31874.190000
34200.970000
34750.340000
37094.800000
38483.740000


### 处理出生率的缺失值

In [6]:
data = pd.read_excel('山河四省.xlsx')
data_chushenglv = data.iloc[:, -1]
data1 = data_chushenglv.interpolate()
data1.to_excel('出生率.xlsx')

### 查看数据的基本信息

In [7]:
data = pd.read_excel('山河四省.xlsx')
from tabulate import tabulate
pd.set_option('display.float_format', '{:.2f}'.format)

In [8]:
describe = pd.DataFrame(data.describe()).T
describe = describe.rename(columns={'count': '样本量', 'mean': '均值',
                                    'std': '标准差', 'min': '最小值',
                                    '25%': '第一四分位数', '50%': '中位数',
                                    '75%': '第三四分位数', 'max': '最大值'})
describe = describe.round(4)

In [12]:
from IPython.display import display

display(describe)

Unnamed: 0,样本量,均值,标准差,最小值,第一四分位数,中位数,第三四分位数,最大值
时间,80.0,2012.5,5.8,2003.0,2007.75,2012.5,2017.25,2022.0
人均地区生产总值（元）,80.0,37836.43,19732.86,7435.0,20547.25,35713.0,50246.17,86003.0
就业人员数（万人）,80.0,4062.48,1609.71,1469.47,2976.6,4340.86,5542.55,6041.56
财政一般预算收入（亿元）,80.0,2477.68,1828.02,186.05,964.15,2062.3,3570.14,7284.46
财政一般预算支出（亿元）,80.0,4708.21,3358.97,330.45,1793.38,4112.23,7246.42,12128.63
居民消费价格指数（上年=100）,80.0,102.53,1.65,99.32,101.52,102.1,103.03,107.19
城镇居民人均可支配收入（元）,80.0,22701.3,11796.26,3930.7,12074.54,22242.45,32149.94,49049.66
建成区面积（平方公里）,80.0,2287.65,1323.69,674.95,1268.44,1979.41,2907.36,5712.99
国家财政性教育经费（万元）,80.0,9496453.32,6852401.22,849910.2,3518874.92,7602681.72,14551795.46,26990467.0
年末参加生育保险人数（万人）,80.0,592.0,354.73,75.18,334.89,505.1,774.12,1646.89
