In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.lines as mlines
import os
from IPython.display import display
import networkx as nx
import statistics
import scipy.stats as st

In [2]:
df_rank = pd.read_csv('./study_data/score/城市信息+排名得分综合表.csv')
# 惯例，ZHEJIANG -> Zhejiang
df_en = pd.read_csv('./study_data/省英文名称.csv')

# Provincial ranking

In [3]:
def show_protj():
    PRO_NAME = list(set(df_rank['pro_name']))
    data = []
    for i in PRO_NAME:
        tep = df_rank[df_rank.pro_name==i]['ARI']
        scores = np.array(list(tep))
        score = round(np.mean(scores), 2)
        score = float('{:.2f}'.format(score))
        data.append([i, score])

    df = pd.DataFrame(data, columns=['pro_name', 'avg_score'])
    df = pd.merge(df, df_en, on='pro_name')
    df = df.sort_values(by=['avg_score'], ascending=False)
    df = df.reset_index().drop(columns=['index'])
    df.to_csv('./study_data/table/省级CII-rank.csv', index=None)
    return df

PRO_RANK = show_protj()
PRO_RANK

Unnamed: 0,pro_name,avg_score,en_name,en_short
0,北京市,100.0,Beijing,BJ
1,上海市,80.68,Shanghai,SH
2,香港特别行政区,64.01,Hong Kong,HK
3,重庆市,61.38,Chongqing,CQ
4,天津市,59.42,Tianjin,TJ
5,澳门特别行政区,57.27,Macau,MO
6,台湾省,56.68,Taiwan,TW
7,浙江省,45.99,Zhejiang,ZJ
8,广东省,43.8,Guangdong,GD
9,江苏省,41.37,Jiangsu,JS


# TOP100 cities, proportion of provinces

In [4]:
rank_100 = df_rank.sort_values(by='ARI', ascending=False)
rank_100 = rank_100.reset_index(drop=True)
rank_100 = rank_100[0:100]
rank_100_data = Counter(rank_100['pro_name'])

In [5]:
rank_100_data

Counter({'广东省': 14,
         '浙江省': 10,
         '江苏省': 8,
         '四川省': 7,
         '云南省': 6,
         '安徽省': 5,
         '河南省': 4,
         '山东省': 4,
         '福建省': 4,
         '陕西省': 3,
         '河北省': 3,
         '内蒙古自治区': 3,
         '湖北省': 2,
         '辽宁省': 2,
         '甘肃省': 2,
         '江西省': 2,
         '广西壮族自治区': 2,
         '山西省': 2,
         '海南省': 2,
         '北京市': 1,
         '上海市': 1,
         '香港特别行政区': 1,
         '重庆市': 1,
         '天津市': 1,
         '澳门特别行政区': 1,
         '台湾省': 1,
         '湖南省': 1,
         '黑龙江省': 1,
         '吉林省': 1,
         '贵州省': 1,
         '新疆维吾尔自治区': 1,
         '宁夏回族自治区': 1,
         '青海省': 1,
         '西藏自治区': 1})

## merge table

In [6]:
data = []
for i in dict(rank_100_data):
    data.append([i, dict(rank_100_data).get(i)])
df_top100_count = pd.DataFrame(data, columns=['pro_name', 'count'])
# 合并
df_pro = pd.merge(PRO_RANK, df_top100_count, on='pro_name')
df_pro.sort_values(by='avg_score', ascending=False)
df_pro['rank'] = df_pro.index + 1
df_pro

Unnamed: 0,pro_name,avg_score,en_name,en_short,count,rank
0,北京市,100.0,Beijing,BJ,1,1
1,上海市,80.68,Shanghai,SH,1,2
2,香港特别行政区,64.01,Hong Kong,HK,1,3
3,重庆市,61.38,Chongqing,CQ,1,4
4,天津市,59.42,Tianjin,TJ,1,5
5,澳门特别行政区,57.27,Macau,MO,1,6
6,台湾省,56.68,Taiwan,TW,1,7
7,浙江省,45.99,Zhejiang,ZJ,10,8
8,广东省,43.8,Guangdong,GD,14,9
9,江苏省,41.37,Jiangsu,JS,8,10


In [7]:
t_left = df_pro[0:17]
t_right = df_pro[17:34]

temp = pd.DataFrame()
temp['rank1'] = list(t_left['rank'])
temp['en_name1'] = list(t_left['en_name'])
temp['score1'] = list(t_left['avg_score'])
temp['conut1'] = list(t_left['count'])

temp['rank2'] = list(t_right['rank'])
temp['en_name2'] = list(t_right['en_name'])
temp['score2'] = list(t_right['avg_score'])
temp['conut2'] = list(t_right['count'])
temp.to_csv('./study_data/table/34_province.csv', index=None)
temp

Unnamed: 0,rank1,en_name1,score1,conut1,rank2,en_name2,score2,conut2
0,1,Beijing,100.0,1,18,Yunnan,29.09,6
1,2,Shanghai,80.68,1,19,Hainan,27.25,2
2,3,Hong Kong,64.01,1,20,Hubei,25.81,2
3,4,Chongqing,61.38,1,21,Jiangxi,25.38,2
4,5,Tianjin,59.42,1,22,Gansu,24.85,2
5,6,Macau,57.27,1,23,Hunan,24.05,1
6,7,Taiwan,56.68,1,24,Guizhou,23.33,1
7,8,Zhejiang,45.99,10,25,Inner Mongolia,23.28,3
8,9,Guangdong,43.8,14,26,Shanxi,22.84,2
9,10,Jiangsu,41.37,8,27,Liaoning,22.38,2


# ranking analysis in province:TOP5 cities

In [8]:
# 谁是省内的TOP5
top5_data = []
top5_data_en = []
exclude_list = ['北京市', '上海市', '重庆市',  '天津市', '香港特别行政区', '澳门特别行政区', '台湾省']

for i in df_en['pro_name']:
    df_inner = df_rank[df_rank.pro_name==i].sort_values(by='ARI', ascending=False)
    df_inner = df_inner.reset_index(drop=True)[0:5]  
    
    val_str = []
    val_str_en = []
    
  
    for j, r in df_inner.iterrows():
        score = '{:.2f}'.format(round(r['ARI'], 2))
        val_str.append(r['name'] + '(' + score + ')')
        val_str_en.append({r['en_name']:score})
        
    if i not in exclude_list:
        top5_data.append([i, val_str])
        top5_data_en.append([df_en[df_en['pro_name']==i]['en_name'].values[0], val_str_en])
    

for i in top5_data:
        print(i)

['浙江省', ['杭州市(64.35)', '宁波市(53.75)', '温州市(51.05)', '金华市(50.78)', '绍兴市(47.70)']]
['江苏省', ['南京市(57.50)', '苏州市(53.90)', '无锡市(50.27)', '扬州市(47.98)', '徐州市(47.90)']]
['广东省', ['广州市(66.33)', '深圳市(65.85)', '珠海市(53.35)', '中山市(51.87)', '佛山市(51.86)']]
['山东省', ['青岛市(53.58)', '济南市(52.00)', '淄博市(42.08)', '烟台市(39.35)', '潍坊市(33.50)']]
['福建省', ['厦门市(53.46)', '福州市(51.70)', '漳州市(39.42)', '泉州市(37.31)', '宁德市(31.09)']]
['陕西省', ['西安市(56.54)', '汉中市(42.12)', '榆林市(37.87)', '延安市(37.11)', '咸阳市(32.16)']]
['河北省', ['石家庄市(51.26)', '保定市(43.85)', '廊坊市(38.64)', '张家口市(36.62)', '唐山市(34.32)']]
['四川省', ['成都市(63.30)', '宜宾市(49.47)', '绵阳市(45.31)', '乐山市(42.49)', '泸州市(42.26)']]
['安徽省', ['合肥市(53.16)', '黄山市(50.82)', '芜湖市(50.10)', '安庆市(39.02)', '阜阳市(37.88)']]
['河南省', ['郑州市(53.64)', '洛阳市(50.33)', '南阳市(48.88)', '安阳市(48.74)', '开封市(36.72)']]
['湖南省', ['长沙市(53.82)', '湘西土家族苗族自治州(32.55)', '株洲市(27.08)', '张家界市(25.55)', '衡阳市(24.61)']]
['湖北省', ['武汉市(58.20)', '襄阳市(48.55)', '宜昌市(32.55)', '恩施土家族苗族自治州(25.54)', '黄石市(25.41)']]
['海南省', ['海口市(50.40)', 

In [9]:
#
show_table = []
for i in top5_data_en:
    pro = i[0]
    city_primacy = list(i[1][0].keys())[0]
    city_primacy_index = float(list(i[1][0].values())[0])/float(list(i[1][1].values())[0])
    UPI_2 = '{:.2f}'.format(round(city_primacy_index, 2))
    
    sum = 0
    for n in range(0, 4):
        sum += float(list(i[1][n].values())[0])
    avg = sum/4
    UPI_4 = '{:.2f}'.format(round(avg, 2))
    
    arr = []
    for j in i[1]:
        city_name = list(j.items())[0][0]
        city_cii = list(j.items())[0][1]
        
        arr.append(city_name + '(' + city_cii + ')')
    show_table.append([pro, city_primacy, UPI_2, UPI_4])
    
show_table_df = pd.DataFrame(show_table, columns=['Province', 'Primacy City', 'UPI_2', 'UPI_4'])
show_table_df.to_csv('./study_data/table/urban_primacy_index.csv', index=None)
show_table_df.sort_values(by='UPI_4', ascending=False)

Unnamed: 0,Province,Primacy City,UPI_2,UPI_4
2,Guangdong,Guangzhou,1.01,59.35
0,Zhejiang,Hangzhou,1.2,54.98
1,Jiangsu,Nanjing,1.07,52.41
9,Henan,Zhengzhou,1.07,50.4
7,Sichuan,Chengdu,1.28,50.14
8,Anhui,Hefei,1.05,48.27
3,Shandong,Qingdao,1.03,46.75
18,Yunnan,Kunming,1.21,45.82
4,Fujian,Xiamen,1.03,45.47
5,Shaanxi,Xi'an,1.34,43.41
