In [None]:
import pycountry
ctr_to_country = {i for i in pycountry.countries}
print(ctr_to_country)

In [None]:
import pandas as pd
import pycountry

# 加载数据集
gdsp = pd.read_excel('GDSP.xlsx')
gdsp_median_income = pd.read_excel('GDSP-and-Median-Income-historical-AM23.xlsx', sheet_name=None)
# personality_data = pd.read_csv('personality_data.csv')
personality_data = pd.read_csv('new_personality_data.csv')
population_data = pd.read_csv('世界各国人口结构数据 2008-2021.csv')

# 创建国家名称到 ISO 代码的映射字典
ctr_to_country = {country.name: country.alpha_3 for country in pycountry.countries}
ctr_to_country.update({country.alpha_2: country.alpha_3 for country in pycountry.countries})
ctr_to_country.update({country.alpha_3: country.alpha_3 for country in pycountry.countries})

# 标准化国家名称
def standardize_country_names(df, column_name):
    df[column_name] = df[column_name].map(ctr_to_country)
    return df

gdsp = standardize_country_names(gdsp, 'Country')
personality_data = standardize_country_names(personality_data, 'country')
population_data = standardize_country_names(population_data, 'country_name')

# 处理缺失值
gdsp.dropna(inplace=True)
personality_data.dropna(inplace=True)
population_data.dropna(inplace=True)


In [None]:
gdsp.head()

In [None]:
personality_data.head()

In [None]:
population_data.head()

In [None]:
# 合并经济数据和人格数据
economic_personality_data = pd.merge(personality_data, gdsp, left_on='country', right_on='Country', how='outer')

# 保存合并后的数据
economic_personality_data.to_excel('economic_personality_data.xlsx', index=False)

# import ace_tools as tools; 
# tools.display_dataframe_to_user(name="Economic and Personality Data", dataframe=economic_personality_data)
# tools.display_dataframe_to_user(name="Population and Personality Data", dataframe=population_personality_data)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import seaborn as sns

# 设置绘图风格
sns.set(style="whitegrid")

# 设置中文字体
zh_font_path = fm.findSystemFonts(fontpaths=None, fontext='ttf')
for font_path in zh_font_path:
    if 'SimHei' in font_path or 'simsun' in font_path:
        zh_font = fm.FontProperties(fname=font_path)
        break

# 热力图：经济数据与人格特质的关联
plt.figure(figsize=(12, 10))
heatmap_data = economic_personality_data[['EXT', 'EST', 'AGR', 'CSN', 'OPN', 'Annualized growth in mean consumption or income per capita-Total Population', 'Baseline-Total Population$ a day (PPP)']]
heatmap_data = heatmap_data.dropna()
correlation = heatmap_data.corr()
sns.heatmap(correlation, annot=True, cmap="YlGnBu")
plt.title('经济数据与人格特质的热力图')
plt.show()

# 散点图：某个经济因素与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='EXT', hue='Country')
plt.title('经济增长与外向性的关系散点图')
plt.show()

# 折线图：不同年份的经济变化与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.lineplot(data=economic_personality_data, x='Period', y='EST', hue='Country')
plt.title('不同年份的经济变化与情绪稳定性的关系折线图')
plt.show()

# 气泡图：经济因素与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='AGR', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False)
plt.title('经济增长与宜人性的关系气泡图')
plt.show()


### 热力图
增加标题字体大小和标签字体大小
将图例放置在图旁边

In [None]:
# 设置绘图风格
sns.set(style="whitegrid")

# 设置英文字体
plt.rcParams["font.family"] = "Arial"


# 热力图：经济数据与人格特质的关联
plt.figure(figsize=(12, 10))
heatmap_data = economic_personality_data[['EXT', 'EST', 'AGR', 'CSN', 'OPN', 'Annualized growth in mean consumption or income per capita-Total Population', 'Baseline-Total Population$ a day (PPP)']]
heatmap_data = heatmap_data.dropna()
correlation = heatmap_data.corr()
sns.heatmap(correlation, annot=True, cmap="YlGnBu", cbar_kws={'shrink': 0.8})
plt.title('Heatmap of Economic Data and Personality Traits', fontsize=16)
plt.savefig('./picture/111economic_personality_heatmap.png', bbox_inches='tight')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# 设置绘图风格
sns.set(style="whitegrid")

# 设置英文字体
plt.rcParams["font.family"] = "Arial"

# 加载数据
combined_data =  economic_personality_data

# 选择需要的列并处理缺失值
heatmap_data = combined_data[['EXT', 'EST', 'AGR', 'CSN', 'OPN', 
                             'Annualized growth in mean consumption or income per capita-Total Population', 
                             'Baseline-Total Population$ a day (PPP)']]
heatmap_data = heatmap_data.dropna()

# 对数据进行变换（例如取对数）
transformed_data = heatmap_data.copy()
transformed_data.iloc[:, 5:] = transformed_data.iloc[:, 5:].apply(np.log1p)

# 计算相关性矩阵
correlation = transformed_data.corr()

# 绘制热力图
plt.figure(figsize=(12, 10))
sns.heatmap(correlation, annot=True, cmap="YlGnBu", cbar_kws={'shrink': 0.8})
plt.title('Heatmap of Economic Data and Personality Traits (Log Transformed)', fontsize=16)
plt.savefig('./picture/new_economic_personality_heatmap_log_transformed.png', bbox_inches='tight')
plt.show()


In [None]:
# 设置绘图风格
sns.set(style="whitegrid")

# 设置英文字体
plt.rcParams["font.family"] = "Arial"


# 热力图：经济数据与人格特质的关联
plt.figure(figsize=(12, 10))
heatmap_data = economic_personality_data[['EXT', 'EST', 'AGR', 'CSN', 'OPN', 'Annualized growth in mean consumption or income per capita-Total Population', 'Baseline-Total Population$ a day (PPP)']]
heatmap_data = heatmap_data.dropna()
correlation = heatmap_data.corr()
sns.heatmap(correlation, annot=True, cmap="YlGnBu", cbar_kws={'shrink': 0.8})
plt.title('Heatmap of Economic Data and Personality Traits', fontsize=16)
plt.savefig('./picture/economic_personality_heatmap.png', bbox_inches='tight')
plt.show()


### 散点图


In [None]:
# 散点图：某个经济因素与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='EXT', hue='Country')
plt.title('Scatter Plot: Economic Growth vs. Extroversion', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Extroversion', fontsize=12)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/economic_ext_scatter.png', bbox_inches='tight')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 选择一些具有代表性的国家
selected_countries = ['GBR', 'USA', 'CHN', 'IND', 'DEU']  # 英国，美国，中国，印度，德国

# 筛选出这些国家的数据
filtered_data = economic_personality_data[economic_personality_data['Country'].isin(selected_countries)]

# 设置绘图风格和颜色
sns.set(style="whitegrid")
palette = sns.color_palette("Set1", len(selected_countries))

# 绘制散点图
plt.figure(figsize=(12, 8))
sns.scatterplot(data=filtered_data, 
                x='Annualized growth in mean consumption or income per capita-Total Population', 
                y='EXT', 
                hue='Country', 
                palette=palette, 
                alpha=0.7, 
                s=100)
plt.title('Scatter Plot: Economic Growth vs. Extroversion', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Extroversion', fontsize=12)
plt.legend(title='Country', loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)
plt.grid(True, linestyle='--', alpha=0.7)

# 添加回归线
sns.regplot(data=filtered_data, 
            x='Annualized growth in mean consumption or income per capita-Total Population', 
            y='EXT', 
            scatter=False, 
            color='gray', 
            line_kws={"lw":2})

# 保存图表
plt.savefig('./picture1/economic_ext_scatter_selected.png', bbox_inches='tight')
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 选择一些具有代表性的国家
selected_countries = ['GBR', 'USA', 'CHN', 'IND', 'DEU']  # 英国，美国，中国，印度，德国

# 筛选出这些国家的数据
filtered_data = economic_personality_data[economic_personality_data['Country'].isin(selected_countries)]

# 描述性统计
desc_stats = filtered_data[['Country', 'Annualized growth in mean consumption or income per capita-Total Population', 'EXT']].groupby('Country').mean()
print("描述性统计：")
print(desc_stats)

# 计算相关系数
correlation = filtered_data[['Annualized growth in mean consumption or income per capita-Total Population', 'EXT']].corr()
print("\n相关系数：")
print(correlation)

# 绘制相关性散点图和回归线
sns.set(style="whitegrid")
plt.figure(figsize=(12, 8))
sns.regplot(data=filtered_data, 
            x='Annualized growth in mean consumption or income per capita-Total Population', 
            y='EXT', 
            scatter=True, 
            color='blue', 
            line_kws={"color":"red"})
plt.title('Scatter Plot with Regression Line: Economic Growth vs. Extroversion', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Extroversion', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# 选择一些具有代表性的国家
selected_countries = ['GBR', 'USA', 'CHN', 'IND', 'DEU']  # 英国，美国，中国，印度，德国

# 筛选出这些国家的数据
filtered_data = economic_personality_data[economic_personality_data['Country'].isin(selected_countries)]

# 计算经济增长率的分位数并分组
growth_bins = pd.qcut(
    filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], 
    q=3, 
    duplicates='drop',
    retbins=True
)

# 动态生成标签
bin_count = len(growth_bins[1]) - 1
labels = ['Low', 'Medium', 'High'][:bin_count]

# 使用实际生成的箱数分组
filtered_data['growth_group'] = pd.qcut(
    filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], 
    q=bin_count, 
    labels=labels
)

# 绘制箱线图
plt.figure(figsize=(12, 8))
sns.boxplot(data=filtered_data, x='growth_group', y='EXT', palette='Set3')
plt.title('Box Plot: Extroversion across Economic Growth Groups', fontsize=16)
plt.xlabel('Economic Growth Group', fontsize=12)
plt.ylabel('Extroversion', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 选择一些具有代表性的国家
selected_countries = ['GBR', 'USA', 'CHN', 'IND', 'DEU']  # 英国，美国，中国，印度，德国

# 筛选出这些国家的数据
filtered_data = economic_personality_data[economic_personality_data['Country'].isin(selected_countries)]

# 计算经济增长率的分位数并分组
growth_bins = pd.qcut(
    filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], 
    q=3, 
    duplicates='drop',
    retbins=True
)

# 动态生成标签
bin_count = len(growth_bins[1]) - 1
labels = ['Low', 'Medium', 'High'][:bin_count]

# 使用实际生成的箱数分组
filtered_data['growth_group'] = pd.qcut(
    filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], 
    q=bin_count, 
    labels=labels
)

# 人格特质列表
traits = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
titles = ['Extroversion (EXT)', 'Emotional Stability (EST)', 'Agreeableness (AGR)', 'Conscientiousness (CSN)', 'Openness (OPN)']

# 绘制并保存每个人格特质的箱线图
for trait, title in zip(traits, titles):
    plt.figure(figsize=(12, 8))
    sns.boxplot(data=filtered_data, x='growth_group', y=trait, palette='Set3')
    plt.title(f'Box Plot: {title} across Economic Growth Groups', fontsize=16)
    plt.xlabel('Economic Growth Group', fontsize=12)
    plt.ylabel(title, fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    output_path = f'./picture/{trait}_economic_growth_boxplot.png'
    plt.savefig(output_path, bbox_inches='tight')
    plt.show()


In [None]:
import statsmodels.api as sm

# 准备回归模型数据
X = filtered_data['Annualized growth in mean consumption or income per capita-Total Population']
y = filtered_data['EXT']
X = sm.add_constant(X)  # 添加常数项

# 进行线性回归
model = sm.OLS(y, X).fit()
print(model.summary())


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# 查看数据分布
plt.figure(figsize=(10, 6))
sns.histplot(filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], kde=True)
plt.title('Distribution of Annualized Growth in Mean Consumption or Income per Capita', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()


In [None]:
# import numpy as np

# # 查看数据的分布情况后，选择自定义边界
# # 这里假设选择0、2和4作为边界，可以根据实际情况调整
# bins = [-np.inf, 0, 2, np.inf]
# labels = ['Low', 'Medium', 'High']

# # 使用自定义边界进行分组
# filtered_data['growth_group'] = pd.cut(filtered_data['Annualized growth in mean consumption or income per capita-Total Population'], bins=bins, labels=labels)

# # 绘制箱线图
# plt.figure(figsize=(12, 8))
# sns.boxplot(data=filtered_data, x='growth_group', y='EXT', palette='Set3')
# plt.title('Box Plot: Extroversion across Economic Growth Groups', fontsize=16)
# plt.xlabel('Economic Growth Group', fontsize=12)
# plt.ylabel('Extroversion', fontsize=12)
# plt.grid(True, linestyle='--', alpha=0.7)
# plt.show()


### 折线图


In [None]:
# 折线图：不同年份的经济变化与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.lineplot(data=economic_personality_data, x='Period', y='EST', hue='Country')
plt.title('Line Plot: Economic Changes vs. Emotional Stability Over Years', fontsize=16)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Emotional Stability', fontsize=12)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/economic_est_line.png', bbox_inches='tight')
plt.show()

### 气泡图

In [None]:
# 气泡图：经济因素与某个人格特质的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='AGR', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False, alpha=0.6)
plt.title('Bubble Chart: Economic Growth vs. Agreeableness', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Agreeableness', fontsize=12)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles=handles, labels=labels, loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/economic_agr_bubble.png', bbox_inches='tight')
plt.show()

In [None]:
# 气泡图：经济因素与外向性的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='EXT', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False)
plt.title('Bubble Chart: Economic Growth vs. Extroversion', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Extroversion', fontsize=12)
plt.savefig('./picture/bubbleplot2.png', bbox_inches='tight')
plt.show()

# 气泡图：经济因素与情绪稳定性的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='EST', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False)
plt.title('Bubble Chart: Economic Growth vs. Emotional Stability', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Emotional Stability', fontsize=12)
plt.savefig('./picture/bubbleplot3.png', bbox_inches='tight')
plt.show()

# 气泡图：经济因素与尽责性的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='CSN', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False)
plt.title('Bubble Chart: Economic Growth vs. Conscientiousness', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Conscientiousness', fontsize=12)
plt.savefig('./picture/bubbleplot4.png', bbox_inches='tight')
plt.show()

# 气泡图：经济因素与开放性的关系
plt.figure(figsize=(10, 8))
sns.scatterplot(data=economic_personality_data, x='Annualized growth in mean consumption or income per capita-Total Population', y='OPN', size='Baseline-Total Population$ a day (PPP)', hue='Country', sizes=(20, 2000), legend=False)
plt.title('Bubble Chart: Economic Growth vs. Openness', fontsize=16)
plt.xlabel('Annualized Growth in Mean Consumption or Income per Capita', fontsize=12)
plt.ylabel('Openness', fontsize=12)
plt.savefig('./picture/bubbleplot5.png', bbox_inches='tight')
plt.show()

print("All images have been saved locally.")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

economic_personality_data = pd.read_excel('economic_personality_data.xlsx')
# 标准化经济增长率和人格特质
scaler = StandardScaler()
economic_personality_data[['Annualized growth in mean consumption or income per capita-Total Population', 'EXT', 'EST', 'CSN', 'OPN', 'AGR']] = scaler.fit_transform(
    economic_personality_data[['Annualized growth in mean consumption or income per capita-Total Population', 'EXT', 'EST', 'CSN', 'OPN', 'AGR']]
)

# 气泡图函数
def bubble_chart(x, y, title, xlabel, ylabel, filename):
    plt.figure(figsize=(12, 10))
    sns.scatterplot(data=economic_personality_data, 
                    x=x, 
                    y=y, 
                    size='Baseline-Total Population$ a day (PPP)', 
                    hue='Country', 
                    sizes=(20, 2000), 
                    alpha=0.6, 
                    palette='viridis')
    sns.regplot(data=economic_personality_data, 
                x=x, 
                y=y, 
                scatter=False, 
                color='red', 
                line_kws={"lw":2})
    plt.title(title, fontsize=16)
    plt.xlabel(xlabel, fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    # plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='Country')
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='Country', ncol=2)  # 设置图例为两列
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig(filename, bbox_inches='tight')
    plt.show()

# 绘制并保存气泡图
bubble_chart('Annualized growth in mean consumption or income per capita-Total Population', 'EXT', 
             'Bubble Chart: Economic Growth vs. Extroversion', 
             'Annualized Growth in Mean Consumption or Income per Capita', 'Extroversion', './picture/bubbleplot2.png')

bubble_chart('Annualized growth in mean consumption or income per capita-Total Population', 'EST', 
             'Bubble Chart: Economic Growth vs. Emotional Stability', 
             'Annualized Growth in Mean Consumption or Income per Capita', 'Emotional Stability', './picture/bubbleplot3.png')

bubble_chart('Annualized growth in mean consumption or income per capita-Total Population', 'CSN', 
             'Bubble Chart: Economic Growth vs. Conscientiousness', 
             'Annualized Growth in Mean Consumption or Income per Capita', 'Conscientiousness', './picture/bubbleplot4.png')

bubble_chart('Annualized growth in mean consumption or income per capita-Total Population', 'OPN', 
             'Bubble Chart: Economic Growth vs. Openness', 
             'Annualized Growth in Mean Consumption or Income per Capita', 'Openness', './picture/bubbleplot5.png')

bubble_chart('Annualized growth in mean consumption or income per capita-Total Population', 'AGR', 
             'Bubble Chart: Economic Growth vs. Agreeableness', 
             'Annualized Growth in Mean Consumption or Income per Capita', 'Agreeableness', './picture/bubbleplot6.png')

print("All images have been saved locally.")


# 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


In [None]:
# 读取数据
# personality_data = pd.read_csv('personality_data.csv')
personality_data = pd.read_csv('new_personality_data.csv')

gdsp_data = pd.read_excel('GDSP.xlsx')
gdsp_median_income = pd.read_excel('GDSP-and-Median-Income-historical-AM23.xlsx', sheet_name='data_byspell')
population_data = pd.read_excel('population.xlsx')


## 数据预处理

In [None]:
# 标准化国家名称
ctr_to_country = {i.alpha_3: i.alpha_2 for i in pycountry.countries}

def convert_country_code(row):
    try:
        return ctr_to_country[row]
    except KeyError:
        return row

personality_data['country_code'] = personality_data['country'].apply(convert_country_code)
gdsp_data['Country_Code'] = gdsp_data['Code'].apply(convert_country_code)
gdsp_median_income['Country_Code'] = gdsp_median_income['code'].apply(convert_country_code)
population_data['Country_Code'] = population_data['country_code'].apply(convert_country_code)


In [None]:
# 合并数据
merged_data = personality_data.merge(gdsp_data, left_on='country_code', right_on='Country_Code', how='inner')
merged_data = merged_data.merge(gdsp_median_income, on='Country_Code', how='inner')
merged_data = merged_data.merge(population_data, on='Country_Code', how='inner')


## 降维处理

In [None]:
# 提取需要降维的列
features = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
x = merged_data.loc[:, features].values
x = StandardScaler().fit_transform(x)


In [None]:
# PCA降维
pca = PCA(n_components=2)
principal_components = pca.fit_transform(x)
principal_df = pd.DataFrame(data=principal_components, columns=['principal_component_1', 'principal_component_2'])
final_df = pd.concat([principal_df, merged_data[['Country_Code']]], axis=1)


## 可视化

In [None]:
# plt.figure(figsize=(80, 70))
# sns.heatmap(merged_data.corr(), annot=True, cmap='coolwarm')
# plt.title('Correlation Heatmap')
# plt.show()
# plt.savefig(./Correlation Heatmap.png)

In [None]:
#热力图展示了数据集中不同变量之间的相关性系数。
#颜色表示相关性的强度和方向，通常从蓝色（负相关）到红色（正相关）。
#通过查看热力图，可以快速识别哪些变量之间存在显著的相关性，这有助于理解人格特质与社会生态指标之间的关系。
# Increase the figure size
plt.figure(figsize=(80, 70))

# Create the heatmap
heatmap = sns.heatmap(merged_data.corr(), annot=True, cmap='coolwarm')

# Set the title and adjust font size
heatmap.set_title('Correlation Heatmap', fontsize=80)

# Adjust tick labels font size and wrap long labels
plt.xticks(fontsize=40)  # Adjust x-axis tick font size
plt.yticks(fontsize=40)  # Adjust y-axis tick font size
plt.tight_layout()  # Ensure tight layout for better visualization

plt.savefig("./picture/Correlation_Heatmap1.png")
# Show the plot
plt.show()


In [None]:
merged_data.head()

### 散点图

散点图展示了主成分分析 (PCA) 后的两个主要成分。这两个成分捕捉了最大程度的人格特质变化。每个点代表一个国家，通过颜色区分不同的国家。这个图表帮助我们理解不同国家的人格特质分布以及它们在主要成分空间中的位置。

In [None]:
# plt.figure(figsize=(20, 16))
# sns.scatterplot(data=final_df, x='principal_component_1', y='principal_component_2', hue='Country_Code')
# plt.title('PCA of Personality Traits')
# plt.show()


In [None]:
plt.figure(figsize=(20, 16))
sns.scatterplot(data=final_df, x='principal_component_1', y='principal_component_2', hue='Country_Code')
plt.title('PCA of Personality Traits')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/scatter_plot.png', bbox_inches='tight')
plt.show()


### 气泡图
气泡图展示了外向性 (EXT) 与人均收入 (meanb401) 之间的关系。气泡的大小表示每个国家的总人口。这个图表帮助我们理解人格特质与经济指标之间的关系，并可视化人口规模的影响。

In [None]:
plt.figure(figsize=(40, 30))
sns.scatterplot(data=merged_data, x='meanb401', y='EXT', size='population_total', hue='Country_Code', sizes=(20, 2000), alpha=0.5)
plt.title('Bubble Chart: EXT vs Mean Income', fontsize=24)
plt.xlabel('Mean Income per Capita (Bottom 40%)', fontsize=18)
plt.ylabel('Extroversion (EXT)', fontsize=18)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/bubble_chart.png', bbox_inches='tight')
plt.show()


In [None]:
#气泡图展示了外向性 (EXT) 与人均收入 (meanb401) 之间的关系。
#气泡的大小表示每个国家的总人口。
#这个图表帮助我们理解人格特质与经济指标之间的关系，并可视化人口规模的影响。
# plt.figure(figsize=(20, 16))
# sns.scatterplot(data=merged_data, x='meanb401', y='EXT', size='population_total', hue='Country_Code', sizes=(20, 2000), alpha=0.5)
# plt.title('Bubble Chart: EXT vs Mean Income')
# plt.show()


### 雷达图
雷达图展示了每个国家在五大人格特质（外向性EXT、神经质EST、宜人性AGR、尽责性CSN、开放性OPN）上的分数。每个轴代表一个人格特质，图中的线条和填充区域表示特定国家在这些特质上的表现。通过雷达图，可以直观地比较不同国家的人格特质分布模式。

In [None]:
import matplotlib.pyplot as plt
from math import pi

# 准备数据
categories = ['EXT', 'EST', 'AGR', 'CSN', 'OPN']
N = len(categories)

# 将角度划分为等份
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

# 准备雷达图数据
plt.figure(figsize=(20, 18))
ax = plt.subplot(111, polar=True)
for index, row in merged_data.iterrows():
    values = row[categories].values.flatten().tolist()
    values += values[:1]
    ax.plot(angles, values, linewidth=1, linestyle='solid', label=row['Country_Code'])
    ax.fill(angles, values, alpha=0.1)
    
plt.title('Radar Chart of Personality Traits')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.savefig('./picture/radar_chart.png', bbox_inches='tight')
plt.show()
