In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 输入数据

In [7]:
import random
import string

# 生成随机国家名称
def random_string(length):
    return ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=length))

num_countries = 10  # 可调整国家数量
countries = [random_string(random.randint(5, 10)) for _ in range(num_countries)]

# 生成 20 列 factor 数据，随机范围
factors = {f'Factor{i}': np.random.uniform(0, 100, size=num_countries) for i in range(1, 21)}

# 创建 DataFrame
df = pd.DataFrame({
    'Country': countries,
    **factors
})

# 显示生成的 DataFrame
print(df)

      Country    Factor1    Factor2    Factor3    Factor4    Factor5  \
0      tYwCUu  73.249513  65.117470  19.989642  94.898850  69.713053   
1  OksbEuHNOQ  40.258069  73.087601  69.320369  52.095133   0.977937   
2     ARDKEsY  54.344906  78.640243   3.489421  53.618043  40.020547   
3      GaFrxn  69.090874  92.190457  62.923529  90.320363  61.655033   
4  sDghDtThWz  30.160184  34.979509  40.660138  19.997944  87.811872   
5       rFepR  60.251092  77.842792   1.005941  63.818460  18.306913   
6       gJEai   2.780251  81.181478  19.822891  98.474756  98.381839   
7     cUiunEy  96.124449  51.611837  67.695038   1.715029  54.573645   
8       JxNUA  25.089851  89.404852   4.978827  54.415200  17.299457   
9      LbIGdn  61.072139  53.475901   1.340460  46.722038  48.723466   

     Factor6    Factor7    Factor8    Factor9  ...   Factor11   Factor12  \
0  53.707506  70.696009  50.752211  69.109100  ...  14.623413  90.809495   
1  12.379237  20.441693  10.561523  42.210602  ...  35.

### 熵权法

In [8]:
# 输入dataframe, 第一列是country
def entropy(data):
    # 提取数值部分（不包含国家列）
    values = data.iloc[:, 1:].values
    
    # min max scaling
    min_vals = values.min(axis=0)
    max_vals = values.max(axis=0)
    norm_values = (values - min_vals) / (max_vals - min_vals)
    
    # 计算每个指标的比重 p_ij
    p_ij = norm_values / norm_values.sum(axis=0)
    
    # 计算熵值 E_j
    k = 1 / np.log(len(data))  # 常数 k
    entropy = -k * np.nansum(p_ij * np.log(p_ij + 1e-10), axis=0)  # 防止 log(0)
    
    # 计算权重 W_j
    diversity = 1 - entropy
    weights = diversity / diversity.sum()
    
    # 计算综合得分
    scores = np.dot(norm_values, weights)
    
    # 创建权重表
    weight_df = pd.DataFrame({
        'Factor': data.columns[1:],
        'Weight': weights
    })
    
    # 创建综合得分表
    score_df = pd.DataFrame({
        'Country': data['Country'],
        'Score': scores
    })
    
    return weight_df, score_df

In [9]:
weight, score = entropy(df)

In [10]:
weight

Unnamed: 0,Factor,Weight
0,Factor1,0.030645
1,Factor2,0.028675
2,Factor3,0.08874
3,Factor4,0.031374
4,Factor5,0.038726
5,Factor6,0.064323
6,Factor7,0.045604
7,Factor8,0.07015
8,Factor9,0.035631
9,Factor10,0.042817


In [11]:
score

Unnamed: 0,Country,Score
0,tYwCUu,0.507073
1,OksbEuHNOQ,0.438004
2,ARDKEsY,0.400318
3,GaFrxn,0.555238
4,sDghDtThWz,0.44533
5,rFepR,0.372815
6,gJEai,0.637005
7,cUiunEy,0.638908
8,JxNUA,0.317646
9,LbIGdn,0.507901


### PCA

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [17]:

def calculate_pca_weights(data):
    # 分离国家名称和因子数据
    countries = data.iloc[:, 0]
    factors = data.iloc[:, 1:]
    
    # 标准化数据
    scaler = StandardScaler()
    standardized_data = scaler.fit_transform(factors)
    
    # 主成分分析
    pca = PCA()
    pca_scores = pca.fit_transform(standardized_data)
    
    # 计算权重：使用第一主成分的载荷绝对值归一化
    first_component = pca.components_[0]
    weights = abs(first_component) / abs(first_component).sum()
    
    # 计算分数
    country_scores = pca_scores[:, 0]
    # 归一化到 0-100
    min_score = country_scores.min()
    max_score = country_scores.max()
    normalized_scores = (country_scores - min_score) / (max_score - min_score) * 100
    
    # 输出权重为 DataFrame
    weight_df = pd.DataFrame({
        'Factor': factors.columns,
        'Weight': weights
    })
    
    score_df = pd.DataFrame({
        'Country': countries,
        'Score': normalized_scores
    })
    
    return weight_df, score_df


# 调用函数计算权重
weights, scores = calculate_pca_weights(df)

In [18]:
weights

Unnamed: 0,Factor,Weight
0,Factor1,0.030916
1,Factor2,0.030849
2,Factor3,0.01114
3,Factor4,0.029635
4,Factor5,0.103016
5,Factor6,0.043361
6,Factor7,0.02562
7,Factor8,0.020617
8,Factor9,0.037592
9,Factor10,0.072261


In [19]:
scores

Unnamed: 0,Country,Score
0,tYwCUu,91.254904
1,OksbEuHNOQ,20.284893
2,ARDKEsY,89.048272
3,GaFrxn,55.266291
4,sDghDtThWz,92.680939
5,rFepR,0.0
6,gJEai,100.0
7,cUiunEy,41.621713
8,JxNUA,28.703256
9,LbIGdn,51.42478
