In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry

# 加载数据
population_data = pd.read_excel('population.xlsx')
personality_data = pd.read_csv('personality_data.csv')
gdsp_data = pd.read_excel('GDSP.xlsx')
gdsp_median_income = pd.read_excel('GDSP-and-Median-Income-historical-AM23.xlsx', sheet_name='data_byspell')

# 加载世界地图数据
# world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')


In [None]:
# 将国家名称转换为ISO代码
ctr_to_country = {i.name: i.alpha_3 for i in pycountry.countries}

def convert_country_name_to_iso(name):
    try:
        return ctr_to_country[name]
    except KeyError:
        return None

population_data['iso_a3'] = population_data['country_name'].apply(convert_country_name_to_iso)
personality_data['iso_a3'] = personality_data['country'].apply(convert_country_name_to_iso)
gdsp_data['iso_a3'] = gdsp_data['Country'].apply(convert_country_name_to_iso)
# gdsp_median_income['iso_a3'] = gdsp_median_income['countryname'].apply(convert_country_name_to_iso)


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry

# 设置绘图风格
sns.set(style="whitegrid")

# 步骤1：加载数据
population_data = pd.read_excel('population.xlsx')
# personality_data = pd.read_csv('personality_data.csv')
personality_data = pd.read_csv('new_personality_data.csv')
gdsp_data = pd.read_excel('GDSP.xlsx')
gdsp_median_income = pd.read_excel('GDSP-and-Median-Income-historical-AM23.xlsx', sheet_name='data_byspell')

# 步骤2：检查数据集列名
print("Population Data Columns:", population_data.columns)
print("Personality Data Columns:", personality_data.columns)
print("GDSP Data Columns:", gdsp_data.columns)
print("GDSP Median Income Data Columns:", gdsp_median_income.columns)

# 步骤3：标准化国家名称为ISO代码
ctr_to_country = {i.name: i.alpha_3 for i in pycountry.countries}

def convert_country_name_to_iso(name):
    try:
        return ctr_to_country[name]
    except KeyError:
        return None

population_data['iso_a3'] = population_data['country_name'].apply(convert_country_name_to_iso)
personality_data['iso_a3'] = personality_data['country'].apply(convert_country_name_to_iso)
gdsp_data['iso_a3'] = gdsp_data['Country'].apply(convert_country_name_to_iso)
gdsp_median_income['iso_a3'] = gdsp_median_income['code'].apply(convert_country_name_to_iso)

# 手动修正未映射的国家代码
manual_country_mapping = {
    'Lao PDR': 'LAO'
}

# 更新iso_a3列
gdsp_data['iso_a3'] = gdsp_data['iso_a3'].fillna(gdsp_data['Country'].map(manual_country_mapping))
gdsp_median_income['iso_a3'] = gdsp_median_income['iso_a3'].fillna(gdsp_median_income['code'].map(manual_country_mapping))


# 将iso_a3列转换为字符串类型
population_data['iso_a3'] = population_data['iso_a3'].astype(str)
personality_data['iso_a3'] = personality_data['iso_a3'].astype(str)
gdsp_data['iso_a3'] = gdsp_data['iso_a3'].astype(str)
gdsp_median_income['iso_a3'] = gdsp_median_income['iso_a3'].astype(str)

# 打印前几行，确认是否正确创建了iso_a3列
print("Population Data Preview with iso_a3:")
print(population_data.head())
print("Personality Data Preview with iso_a3:")
print(personality_data.head())
print("GDSP Data Preview with iso_a3:")
print(gdsp_data.head())
print("GDSP Median Income Data Preview with iso_a3:")
print(gdsp_median_income.head())

# 步骤4：合并数据
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

merged_population = world.set_index('ADM0_A3').join(population_data.set_index('iso_a3'))
merged_personality = personality_data.merge(gdsp_data, on='iso_a3', how='inner').merge(gdsp_median_income, on='iso_a3', how='inner')

# 步骤5：绘制世界地图

# 世界地图：显示总人口
plt.figure(figsize=(15, 10))
ax = merged_population.plot(column='population_total', cmap='OrRd', legend=True, 
                            legend_kwds={'label': "Total Population by Country",
                                         'orientation': "horizontal"})
ax.set_title('World Map of Total Population by Country', fontsize=16)
plt.savefig('./picture1/world_map_total_population.png', bbox_inches='tight')
plt.show()

# 世界地图：显示人口增长率
plt.figure(figsize=(15, 10))
ax = merged_population.plot(column='population_growth', cmap='YlGn', legend=True, 
                            legend_kwds={'label': "Population Growth Rate by Country",
                                         'orientation': "horizontal"})
ax.set_title('World Map of Population Growth Rate by Country', fontsize=16)
plt.savefig('./picture1/world_map_population_growth.png', bbox_inches='tight')
plt.show()

# 步骤6：绘制其他图表

# # 人口金字塔图：显示不同年龄段人口
# # 生成age_group和gender示例数据
# # 假设数据中有'gender'和'age_group'列
# # 示例代码：绘制人口金字塔图
# # 示例数据需要根据实际数据调整
# population_data['age_group'] = pd.cut(population_data['year'], bins=[0, 14, 64, 100], labels=['0-14', '15-64', '65+'])
# population_data['gender'] = 'Unknown'  # 需要根据实际数据替换

# plt.figure(figsize=(12, 8))
# sns.barplot(data=population_data, x='age_group', y='population_total', hue='gender')
# plt.title('Population Pyramid by Age Group', fontsize=16)
# plt.xlabel('Age Group', fontsize=14)
# plt.ylabel('Total Population', fontsize=14)
# plt.legend(title='Gender')
# plt.savefig('./picture1/population_pyramid.png', bbox_inches='tight')
# plt.show()

# 散点图：显示出生率和死亡率的关系
plt.figure(figsize=(30, 24))
sns.scatterplot(data=population_data, x='birth_rate_crude', y='death_rate_crude', hue='country_name')
plt.title('Scatter Plot: Birth Rate vs Death Rate by Country', fontsize=16)
plt.xlabel('Crude Birth Rate', fontsize=14)
plt.ylabel('Crude Death Rate', fontsize=14)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture1/birth_death_scatter.png', bbox_inches='tight')
plt.show()

# # 步骤7：生成箱线图

# # 计算经济增长率的平均值并分组
# mean_growth = merged_personality['Annualized growth in mean consumption or income per capita-Total Population'].mean()
# merged_personality['gdp_group'] = pd.cut(merged_personality['Annualized growth in mean consumption or income per capita-Total Population'],
#                                          bins=[merged_personality['Annualized growth in mean consumption or income per capita-Total Population'].min(), 
#                                                mean_growth, 
#                                                merged_personality['Annualized growth in mean consumption or income per capita-Total Population'].max()],
#                                          labels=['Low', 'High'])

# # 人格特质列表
# traits = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
# titles = ['Extraversion (EXT)', 'Emotional Stability (EST)', 'Agreeableness (AGR)', 'Conscientiousness (CSN)', 'Openness (OPN)']

# # 逐个绘制箱线图
# for trait, title in zip(traits, titles):
#     plt.figure(figsize=(10, 6))
#     sns.boxplot(
#         data=merged_personality,
#         x='gdp_group',
#         y=trait,
#         palette='Set3'
#     )

#     plt.title(f'{title} Across Economic Groups', fontsize=16)
#     plt.xlabel('Economic Group (by Growth Rate)', fontsize=14)
#     plt.ylabel(f'{title} Score', fontsize=14)
#     plt.savefig(f'./picture1/{trait}_economic_groups_boxplot.png', bbox_inches='tight')
#     plt.show()

# print("All images have been saved locally.")


In [None]:
# 检查合并后的数据集中的关键列
print("Merged Personality Data Columns:")
print(merged_personality.columns)

# 确认Annualized growth列是否有缺失值
growth_column = 'Annualized growth in mean consumption or income per capita-Total Population'
print(f"Check for NaN in {growth_column}:")
print(merged_personality[growth_column].isna().sum())


In [None]:
# 加载世界地图数据
# world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = gpd.read_file('./ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 假设已经有合并后的数据框 combined_data
combined_data_path = 'economic_personality_data.xlsx'
combined_data = pd.read_excel(combined_data_path)

# 检查数据框的前几行
print(combined_data.head())

# 确认是否有缺失值
print(combined_data.isna().sum())

# 确保没有缺失值，如果有则填充或移除
combined_data = combined_data.dropna(subset=['Annualized growth in mean consumption or income per capita-Bottom 40%'])

# 计算经济增长率的平均值并分组
growth_column = 'Annualized growth in mean consumption or income per capita-Bottom 40%'
combined_data['gdp_group'] = pd.cut(combined_data[growth_column],
                                    bins=[combined_data[growth_column].min(), 
                                          combined_data[growth_column].mean(), 
                                          combined_data[growth_column].max()],
                                    labels=['Low', 'High'],
                                    include_lowest=True)

# 人格特质列表
traits = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
titles = ['Extraversion (EXT)', 'Emotional Stability (EST)', 'Agreeableness (AGR)', 'Conscientiousness (CSN)', 'Openness (OPN)']

# 逐个绘制箱线图
for trait, title in zip(traits, titles):
    plt.figure(figsize=(10, 6))
    sns.boxplot(
        data=combined_data,
        x='gdp_group',
        y=trait,
        palette='Set3'
    )

    plt.title(f'{title} Across Economic Groups', fontsize=16)
    plt.xlabel('Economic Group (by Growth Rate)', fontsize=14)
    plt.ylabel(f'{title} Score', fontsize=14)
    # plt.savefig(f'./picture1/{trait}_economic_groups_boxplot.png', bbox_inches='tight')
    plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 假设已经有合并后的数据框 combined_data
# 例如: combined_data = pd.read_excel('path_to_combined_data.xlsx')

# 确保没有缺失值，如果有则填充或移除
combined_data = combined_data.dropna(subset=['Annualized growth in mean consumption or income per capita-Bottom 40%'])

# 计算经济增长率的分位数并分组
growth_column = 'Annualized growth in mean consumption or income per capita-Bottom 40%'
combined_data['gdp_group'] = pd.qcut(combined_data[growth_column], q=3, labels=['Low', 'Medium', 'High'])

# 人格特质列表
traits = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
titles = ['Extraversion (EXT)', 'Emotional Stability (EST)', 'Agreeableness (AGR)', 'Conscientiousness (CSN)', 'Openness (OPN)']

# 设置调色板
#使用 swarmplot 可以显示数据点但不会像 stripplot 那样密集
palette = sns.color_palette("Set2")

# 逐个绘制箱线图
for trait, title in zip(traits, titles):
    plt.figure(figsize=(10, 6))
    sns.boxplot(
        data=combined_data,
        x='gdp_group',
        y=trait,
        palette=palette
    )
    sns.swarmplot(
        data=combined_data,
        x='gdp_group',
        y=trait,
        color='black',
        alpha=0.6,
        size=3
    )

    plt.title(f'{title} Across Economic Groups', fontsize=16)
    plt.xlabel('Economic Group (by Growth Rate)', fontsize=14)
    plt.ylabel(f'{title} Score', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig(f'./picture/{trait}_economic_groups_boxplot.png', bbox_inches='tight')
    plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 假设已经有合并后的数据框 combined_data
# 例如: combined_data = pd.read_excel('path_to_combined_data.xlsx')

# 确保没有缺失值，如果有则填充或移除
economic_factor = 'Annualized growth in mean consumption or income per capita-Total Population'
combined_data = combined_data.dropna(subset=[economic_factor])

# 计算经济增长率的分位数并分组
combined_data['gdp_group'] = pd.qcut(combined_data[economic_factor], q=3, labels=['Low', 'Medium', 'High'])

# 人格特质列表
traits = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
titles = ['Extraversion (EXT)', 'Emotional Stability (EST)', 'Agreeableness (AGR)', 'Conscientiousness (CSN)', 'Openness (OPN)']

# 设置调色板
palette = sns.color_palette("Set2")

# 逐个绘制箱线图
for trait, title in zip(traits, titles):
    plt.figure(figsize=(10, 6))
    sns.boxplot(
        data=combined_data,
        x='gdp_group',
        y=trait,
        palette=palette
    )
    sns.swarmplot(
        data=combined_data,
        x='gdp_group',
        y=trait,
        color='black',
        alpha=0.6,
        size=3
    )

    plt.title(f'{title} Across Economic Groups ({economic_factor})', fontsize=16)
    plt.xlabel('Economic Group (by Growth Rate)', fontsize=14)
    plt.ylabel(f'{title} Score', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig(f'./picture1/{trait}_economic_groups_boxplot_{economic_factor}.png', bbox_inches='tight')
    plt.show()
