In [None]:
import pandas as pd

# 加载 GDSP 数据
gdsp_data = pd.read_excel('./GDSP.xlsx')

# 加载 GDSP-and-Median-Income-historical-AM23 数据
gdsp_historical_data = pd.read_excel('./GDSP-and-Median-Income-historical-AM23.xlsx')

# 加载世界各国人口结构数据
world_population_data = pd.read_csv('./世界各国人口结构数据 2008-2021.csv', encoding='utf-8')

# 显示数据的前几行以及数据类型
print("GDSP 数据预览:")
print(gdsp_data.head())
print(gdsp_data.dtypes)

print("\nGDSP 历史数据预览:")
print(gdsp_historical_data.head())
print(gdsp_historical_data.dtypes)

print("\n世界各国人口结构数据预览:")
print(world_population_data.head())
print(world_population_data.dtypes)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 加载数据
gdsp_data = pd.read_excel('./GDSP.xlsx')
world_population_data = pd.read_csv('./世界各国人口结构数据 2008-2021.csv')

# 1. 数据预处理
# 处理缺失值：对于数值型数据，使用中位数填充
gdsp_data.fillna(gdsp_data.median(), inplace=True)
world_population_data.fillna(world_population_data.median(), inplace=True)

# 确保数据类型正确
world_population_data['year'] = world_population_data['year'].astype(int)

# 2. 探索性数据分析
# 统计描述
gdsp_stats = gdsp_data.describe()
population_stats = world_population_data.describe()

# 显示描述性统计结果
print("GDSP数据的描述性统计：")
print(gdsp_stats)
print("\n人口数据的描述性统计：")
print(population_stats)


In [None]:
# 设置Seaborn的绘图风格
sns.set(style="whitegrid")

# 合并数据集
merged_data = pd.merge(world_population_data, gdsp_data, left_on='country_name', right_on='Country', how='inner')

# 相关性分析
correlation_analysis = merged_data.corr()

# 散点图：展示人均消费或收入的年化增长与人口总数之间的关系
print("散点图：展示人均消费或收入的年化增长与人口总数之间的关系")
plt.figure(figsize=(10, 6))
sns.scatterplot(x='population_total', y='Annualized growth in mean consumption or income per capita-Total Population', data=merged_data)
plt.title('Annualized Growth in Mean Consumption or Income per Capita vs. Total Population')
plt.xlabel('Total Population')
plt.ylabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.show()

# 折线图：展示选定国家的经济增长趋势与人口老龄化比例的变化
print("折线图：展示选定国家的经济增长趋势与人口老龄化比例的变化")
plt.figure(figsize=(12, 8))
countries = ['China', 'United States', 'India']
for country in countries:
    country_data = merged_data[merged_data['country_name'] == country]
    plt.plot(country_data['year'], country_data['Annualized growth in mean consumption or income per capita-Total Population'], label=f'{country} - Economic Growth')
    plt.plot(country_data['year'], country_data['age_dependency_ratio_old'], label=f'{country} - Old Age Dependency Ratio')
plt.title('Economic Growth and Old Age Dependency Ratio (2016-2018)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.show()

# 热力图：分析不同国家的人格特质与社会经济指标的关系
print("热力图：分析不同国家的人格特质与社会经济指标的关系")
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_analysis, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix of Economic and Population Indicators')
plt.show()

# 气泡图：将人口数据与经济增长数据结合，显示人口结构的影响
print("气泡图：将人口数据与经济增长数据结合，显示人口结构的影响")
plt.figure(figsize=(14, 8))
sns.scatterplot(x='Annualized growth in mean consumption or income per capita-Total Population', 
                y='age_dependency_ratio', 
                size='population_total', 
                hue='country_name', 
                data=merged_data, 
                sizes=(20, 2000), alpha=0.5, palette='muted', legend=False)
plt.title('Bubble Chart of Economic Growth and Age Dependency Ratio')
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.ylabel('Age Dependency Ratio')
plt.show()

# 雷达图：比较不同国家在多个维度（如人格特质、经济、社会指标）的表现
print("雷达图：比较不同国家在多个维度（如人格特质、经济、社会指标）的表现")
# 选择几个关键国家和指标
selected_countries = merged_data[merged_data['country_name'].isin(countries)]
selected_indicators = ['Annualized growth in mean consumption or income per capita-Total Population', 
                       'population_growth', 'age_dependency_ratio', 'Baseline-Median $ a day (PPP)']

# 正则化数据到[0, 1]范围
selected_data = selected_countries[selected_indicators].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

# 添加国家名称
selected_data['country_name'] = selected_countries['country_name']

# 创建雷达图
num_vars = len(selected_indicators)
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
angles += angles[:1]

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

for country in selected_data['country_name'].unique():
    values = selected_data[selected_data['country_name'] == country].mean().tolist()
    values += values[:1]
    ax.plot(angles, values, label=country)
    ax.fill(angles, values, alpha=0.25)

ax.set_yticklabels([])
ax.set_xticks(angles[:-1])
ax.set_xticklabels(selected_indicators)
plt.title('Radar Chart of Key Indicators for Selected Countries')
plt.legend()
plt.show()


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

# 加载世界地图数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

# 合并世界地图数据和GDSP数据
world = world.merge(gdsp_data, left_on='NAME', right_on='Country', how='left')

# 选择要展示的指标，比如 'Annualized growth in mean consumption or income per capita-Total Population'
indicator = 'Annualized growth in mean consumption or income per capita-Total Population'

# 创建地图可视化
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
world.boundary.plot(ax=ax)
world.plot(column=indicator, ax=ax, legend=True,
           legend_kwds={'label': f"{indicator}",
                        'orientation': "horizontal"},
           cmap='OrRd')
plt.title('World Map Showing Annualized Growth in Mean Consumption or Income per Capita')
plt.show()


In [None]:
# 热力图：分析不同国家的人格特质与社会经济指标的关系
print("热力图：分析不同国家的人格特质与社会经济指标的关系")
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_analysis, cmap='coolwarm')
plt.title('Correlation Matrix of Economic and Population Indicators')
plt.show()


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# 加载世界地图数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

# 加载最新的经济数据
gdsp_data = pd.read_excel('./GDSP.xlsx')

# 打印部分数据检查
print("GDSP data sample:")
print(gdsp_data.head())

# 检查美国在经济数据中的名称
print("美国在经济数据中的名称:")
print(gdsp_data[gdsp_data['Country'].str.contains("United States")])

# 检查美国在地图数据中的名称
print("美国在地图数据中的名称:")
print(world[world['NAME'].str.contains("United States")])

# 创建更详细的国家名称映射字典
country_name_mapping = {
    "United States of America": "United States",
    "United States": "United States of America",
    "Russian Federation": "Russia",
    "Republic of Korea": "South Korea",
    "Iran (Islamic Republic of)": "Iran",
    "Viet Nam": "Vietnam",
    "United Kingdom": "United Kingdom",
    "United Republic of Tanzania": "Tanzania",
    "Côte d'Ivoire": "Ivory Coast",
    "Syrian Arab Republic": "Syria",
    "Lao People's Democratic Republic": "Laos",
    "Democratic Republic of the Congo": "Democratic Republic of Congo",
    "Republic of the Congo": "Congo",
    "Bolivia (Plurinational State of)": "Bolivia",
    "Brunei Darussalam": "Brunei",
    "Czechia": "Czech Republic",
    "Gambia": "The Gambia",
    "Hong Kong Special Administrative Region of China": "Hong Kong",
    "Moldova (Republic of)": "Moldova",
    "Korea (Democratic People's Republic of)": "North Korea",
    "Palestine, State of": "Palestine",
    "Taiwan (Province of China)": "Taiwan",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
    "Lao PDR": "Laos",
    "Czech Republic": "Czech Republic",
    "Kyrgyz Republic": "Kyrgyzstan",
    "Slovak Republic": "Slovakia",
    "Türkiye": "Turkey",
    "Malta": "Malta",
    "Dominican Republic": "Dominican Republic",
    "Egypt, Arab Rep.": "Egypt",
    "Iran, Islamic Rep.": "Iran",
    "Gambia, The": "The Gambia",
    "Seychelles": "Seychelles"
}


gdsp_data['Country'] = gdsp_data['Country'].replace(country_name_mapping)

# 检查未映射的国家
unmapped_countries = gdsp_data[~gdsp_data['Country'].isin(world['NAME'])]['Country'].unique()
print("Unmapped countries in GDSP data:")
print(unmapped_countries)

# 合并世界地图数据和GDSP数据
world = world.merge(gdsp_data, left_on='NAME', right_on='Country', how='left')

# 检查合并后的数据
print(world[['NAME', 'Country', 'Annualized growth in mean consumption or income per capita-Total Population']].head())

# 创建年份列表
years = [2016, 2017, 2018]

# 可视化不同年份的指标
for year in years:
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    world.boundary.plot(ax=ax)
    world.plot(column='Annualized growth in mean consumption or income per capita-Total Population', ax=ax, legend=True,
               legend_kwds={'label': f'Annualized Growth in {year}',
                            'orientation': "horizontal"},
               cmap='OrRd')
    plt.title(f'World Map Showing Annualized Growth in Mean Consumption or Income per Capita in {year}')
    plt.show()

# 加载人口数据
world_population_data = pd.read_csv('./世界各国人口结构数据 2008-2021.csv')

# 合并人口数据和经济数据
merged_data = pd.merge(world_population_data, gdsp_data, left_on='country_name', right_on='Country', how='inner')

# 热力图：展示经济和人口统计数据的相关性
plt.figure(figsize=(12, 10))
correlation_analysis = merged_data.select_dtypes(include=[np.number]).corr()
sns.heatmap(correlation_analysis, annot=False, cmap='coolwarm')
plt.title('Heatmap of Correlation between Economic and Population Indicators')
plt.show()

# 人口与经济数据的散点图
plt.figure(figsize=(10, 6))
sns.scatterplot(x='population_total', y='Annualized growth in mean consumption or income per capita-Total Population', data=merged_data)
plt.title('Annualized Growth in Mean Consumption or Income per Capita vs. Total Population')
plt.xlabel('Total Population')
plt.ylabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.show()

# 折线图：展示选定国家的经济增长趋势与人口老龄化比例的变化
plt.figure(figsize=(12, 8))
countries = ['China', 'United States', 'India']
for country in countries:
    country_data = merged_data[merged_data['country_name'] == country]
    plt.plot(country_data['year'], country_data['Annualized growth in mean consumption or income per capita-Total Population'], label=f'{country} - Economic Growth')
    plt.plot(country_data['year'], country_data['age_dependency_ratio_old'], label=f'{country} - Old Age Dependency Ratio')
plt.title('Economic Growth and Old Age Dependency Ratio (2016-2018)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.show()

# 气泡图：将人口数据与经济增长数据结合，显示人口结构的影响
plt.figure(figsize=(14, 8))
sns.scatterplot(x='Annualized growth in mean consumption or income per capita-Total Population', 
                y='age_dependency_ratio', 
                size='population_total', 
                hue='country_name', 
                data=merged_data, 
                sizes=(20, 2000), alpha=0.5, palette='muted', legend=False)
plt.title('Bubble Chart of Economic Growth and Age Dependency Ratio')
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.ylabel('Age Dependency Ratio')
plt.show()


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# 加载世界地图数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

# 合并世界地图数据和GDSP数据
world = world.merge(gdsp_data, left_on='NAME', right_on='Country', how='left')

# 创建年份列表
years = [2016, 2017, 2018]

# 可视化不同年份的指标
for year in years:
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    world.boundary.plot(ax=ax)
    world.plot(column='Annualized growth in mean consumption or income per capita-Total Population', ax=ax, legend=True,
               legend_kwds={'label': f'Annualized Growth in {year}',
                            'orientation': "horizontal"},
               cmap='OrRd')
    plt.title(f'World Map Showing Annualized Growth in Mean Consumption or Income per Capita in {year}')
    plt.show()

# 热力图：展示经济和人口统计数据的相关性
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_analysis, annot=False, cmap='coolwarm')
plt.title('Heatmap of Correlation between Economic and Population Indicators')
plt.show()

# 人口与经济数据的散点图
plt.figure(figsize=(10, 6))
sns.scatterplot(x='population_total', y='Annualized growth in mean consumption or income per capita-Total Population', data=merged_data)
plt.title('Annualized Growth in Mean Consumption or Income per Capita vs. Total Population')
plt.xlabel('Total Population')
plt.ylabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.show()

# 折线图：展示选定国家的经济增长趋势与人口老龄化比例的变化
plt.figure(figsize=(12, 8))
countries = ['China', 'United States', 'India']
for country in countries:
    country_data = merged_data[merged_data['country_name'] == country]
    plt.plot(country_data['year'], country_data['Annualized growth in mean consumption or income per capita-Total Population'], label=f'{country} - Economic Growth')
    plt.plot(country_data['year'], country_data['age_dependency_ratio_old'], label=f'{country} - Old Age Dependency Ratio')
plt.title('Economic Growth and Old Age Dependency Ratio (2016-2018)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.show()

# 气泡图：将人口数据与经济增长数据结合，显示人口结构的影响
plt.figure(figsize=(14, 8))
sns.scatterplot(x='Annualized growth in mean consumption or income per capita-Total Population', 
                y='age_dependency_ratio', 
                size='population_total', 
                hue='country_name', 
                data=merged_data, 
                sizes=(20, 2000), alpha=0.5, palette='muted', legend=False)
plt.title('Bubble Chart of Economic Growth and Age Dependency Ratio')
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.ylabel('Age Dependency Ratio')
plt.show()


In [None]:
# 为了分析各大洲的经济增长率，我们需要合并GDSP数据和地理信息数据，并按大洲进行分组和分析。

import geopandas as gpd
import matplotlib.pyplot as plt

# 加载世界地图数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

# 确保GDSP数据中的国家名称与地图数据中的名称一致
gdsp_data['Country'] = gdsp_data['Country'].str.strip()

# 合并世界地图数据和GDSP数据
world = world.merge(gdsp_data, left_on='NAME', right_on='Country', how='left')

# 创建一个大洲字段，基于地区进行分组
world['continent'] = world['CONTINENT']

# 按大洲分组并计算年化增长率的平均值
continent_growth = world.groupby('continent')['Annualized growth in mean consumption or income per capita-Total Population'].mean().reset_index()

# 可视化各大洲的经济增长率
plt.figure(figsize=(12, 8))
sns.barplot(x='continent', y='Annualized growth in mean consumption or income per capita-Total Population', data=continent_growth)
plt.title('Average Annualized Growth in Mean Consumption or Income per Capita by Continent')
plt.xlabel('Continent')
plt.ylabel('Average Annualized Growth in Mean Consumption or Income per Capita')
plt.show()

# 显示按大洲分组的平均年化增长率
continent_growth


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# 加载世界地图数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

# 确保GDSP数据中的国家名称与地图数据中的名称一致
gdsp_data['Country'] = gdsp_data['Country'].str.strip()

# 合并世界地图数据和GDSP数据
world = world.merge(gdsp_data, left_on='NAME', right_on='Country', how='left')

# 选择一些具体的国家进行展示
selected_countries = ['China', 'United States', 'India', 'Brazil', 'Germany']

# 筛选出这些国家的数据
selected_countries_data = world[world['Country'].isin(selected_countries)]

# 可视化具体国家的经济增长率
plt.figure(figsize=(12, 8))
sns.barplot(x='Country', y='Annualized growth in mean consumption or income per capita-Total Population', data=selected_countries_data)
plt.title('Annualized Growth in Mean Consumption or Income per Capita by Country')
plt.xlabel('Country')
plt.ylabel('Annualized Growth in Mean Consumption or Income per Capita')
plt.show()

# 显示具体国家的年化增长率数据
print(selected_countries_data[['Country', 'Annualized growth in mean consumption or income per capita-Total Population']])
