# 利用numpy计算中国所有县之间的距离矩阵

In [None]:
import os
path = os.getcwd()
import matplotlib.pyplot as plt

In [None]:
import numpy as np

## 利用numpy的load功能，导入数据，设置编码格式为utf-8以支持中文

In [None]:
pnt = np.loadtxt(open("../data/XianCH_PNT2.csv", encoding='utf8'),
                 skiprows=1,
                dtype=np.str,delimiter=',')

In [None]:
pnt

## 转换字符串为浮点型数据

In [None]:
X = np.array([pnt[:,5].astype(np.float64)])
Y = np.array([pnt[:,6].astype(np.float64)])

## 计算距离矩阵

### numpy有很强的矩阵计算能力，适合超大型数据的快速计算

In [None]:
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)  

In [None]:
D

## 获取中国最东到最西的两个县

In [None]:
i = np.argwhere(X == X.min())[0][1]
j = np.argwhere(X == X.max())[0][1]
print("i = {0},name = {1},j = {2},name = {3}".format(i,pnt[i][2],
                                                     j,pnt[j][2]))

## 获取中国最南到最北的两个县

In [None]:
i = np.argwhere(Y == Y.min())[0][1]
j = np.argwhere(Y == Y.max())[0][1]
print("i = {0},name = {1},j = {2},name = {3}".format(i,pnt[i][2],
                                                     j,pnt[j][2]))

## 手动获取指定的县

In [None]:
i = np.argwhere(pnt == "漠河县")[0][0]
j = np.argwhere(pnt == "密云县")[0][0]
print("i = {0}, j = {1}".format(i,j))

## 显示距离

In [None]:
print("距离1为 ： {0}(单位：十进制度)， 大致地理距离为 ： {1}(单位：公里)\
        ".format(D[i][j],D[i][j] * 105.0))

## 绘制地图

In [None]:
X1 = np.array([pnt[i][5],pnt[j][5]]).astype(np.float64)
Y1 = np.array([pnt[i][6],pnt[j][6]]).astype(np.float64)
fig=plt.figure(figsize=(9,6)) 
ax=fig.gca() 
plt.xlim(70,139)
plt.ylim(15,55)
ax.scatter(X,Y)
ax.plot(X1,Y1, "r")
ax.scatter(X1,Y1)

# 效率对比试验，用传统迭代的方式，来对比numpy的矩阵运算

In [None]:
import math

In [None]:
%%time
D2 = []
for i in range(len(X[0])):
    t = []
    for j in range(len(X[0])):
        d = math.sqrt((X[0][i]-X[0][j])**2 + (Y[0][i]-Y[0][j])**2)
        t.append(d)
    D2.append(t)

In [None]:
2048 **2

In [None]:
%%time
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)  

# 快速获取k临近要素

In [None]:
i = np.argwhere(pnt == "香河县")[0][0]
for n in range(1,10):
    ns = np.sort(D[i])
    print(pnt[np.where(D[i]==ns[n])[0][0]])