In [115]:
import numpy as np
import pandas as pd
import netCDF4 as nc
import geopandas as gpd
from netCDF4 import num2date
import datetime

from tqdm import tqdm

tqdm.pandas()
from shapely.geometry import Point

import matplotlib.pyplot as plt

import platform

if platform.system() == 'Darwin':
    plt.rcParams["font.family"] = 'Arial Unicode MS'
elif platform.system() == 'Windows':
    plt.rcParams["font.family"] = 'SimHei'
else:
    pass

## 加载nc文件和读取数据


In [116]:
nc_data = nc.Dataset("cru_ts4.05.1901.2020.tmp.dat.nc")

In [117]:
for temp_value in nc_data.variables.values():
    print("*" * 70)
    print(temp_value)

**********************************************************************
<class 'netCDF4._netCDF4.Variable'>
float32 lon(lon)
    long_name: longitude
    units: degrees_east
unlimited dimensions: 
current shape = (720,)
filling on, default _FillValue of 9.969209968386869e+36 used
**********************************************************************
<class 'netCDF4._netCDF4.Variable'>
float32 lat(lat)
    long_name: latitude
    units: degrees_north
unlimited dimensions: 
current shape = (360,)
filling on, default _FillValue of 9.969209968386869e+36 used
**********************************************************************
<class 'netCDF4._netCDF4.Variable'>
float32 time(time)
    long_name: time
    units: days since 1900-1-1
    calendar: gregorian
unlimited dimensions: time
current shape = (1440,)
filling on, default _FillValue of 9.969209968386869e+36 used
**********************************************************************
<class 'netCDF4._netCDF4.Variable'>
float32 tmp(time, la

  print(temp_value)


## 提取变量


In [118]:
raw_lat_data = np.array(nc_data.variables['lat'])
raw_lon_data = np.array(nc_data.variables['lon'])
raw_time_data = np.array(nc_data.variables['time'])
raw_tmp_data = np.array(nc_data.variables['tmp'])


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  raw_lat_data = np.array(nc_data.variables['lat'])
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  raw_lon_data = np.array(nc_data.variables['lon'])
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  raw_time_data = np.array(nc_data.variables['time'])
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  raw_tmp_data = np.array(nc_data.variables['tmp'])


In [119]:
raw_tmp_data.shape


(1440, 360, 720)

In [120]:
def cftime2datetime(cftime, units, format='%Y-%m-%d %H:%M:%S'):
    """
    将nc文件里面的时间格式 从cftime 转换到 datetime格式
    :param cftime:
    :param units:
    :param format:
    :return:
    """
    return datetime.datetime.strptime(num2date(times=cftime, units=units).strftime(format), format)


clean_time_data = [cftime2datetime(i, units='days since 1900-1-1') for i in raw_time_data]
clean_time_data[:4]

[datetime.datetime(1901, 1, 16, 0, 0),
 datetime.datetime(1901, 2, 15, 0, 0),
 datetime.datetime(1901, 3, 16, 0, 0),
 datetime.datetime(1901, 4, 16, 0, 0)]

## 计算全球每年的平均气温

In [121]:
avg_mean_tmp = np.ones(shape=raw_tmp_data.shape[0])

for index in tqdm(range(raw_tmp_data.shape[0])):
    temp_temp = raw_tmp_data[index, :, :]
    avg_mean_tmp[index] = temp_temp[temp_temp != 9.96921e+36].mean()

100%|██████████| 1440/1440 [00:00<00:00, 2148.57it/s]


In [122]:
avg_mean_tmp_df = pd.DataFrame({'date': clean_time_data,
                                'avg_mean_tmp': avg_mean_tmp})
avg_mean_tmp_df['year'] = avg_mean_tmp_df['date'].dt.year
avg_mean_tmp_df['month'] = avg_mean_tmp_df['date'].dt.month
avg_mean_tmp_df


Unnamed: 0,date,avg_mean_tmp,year,month
0,1901-01-16,-2.276233,1901,1
1,1901-02-15,-0.606501,1901,2
2,1901-03-16,3.036036,1901,3
3,1901-04-16,7.728032,1901,4
4,1901-05-16,12.336249,1901,5
...,...,...,...,...
1435,2020-08-16,17.980717,2020,8
1436,2020-09-16,15.052033,2020,9
1437,2020-10-16,10.223289,2020,10
1438,2020-11-16,5.359230,2020,11


In [123]:
year_tmp_df = avg_mean_tmp_df.groupby(['year']).agg(
    avg_tmp=('avg_mean_tmp', 'mean')
).reset_index()
year_tmp_df.head()

Unnamed: 0,year,avg_tmp
0,1901,8.029477
1,1902,7.642335
2,1903,7.813257
3,1904,7.759539
4,1905,7.875768


In [128]:
% matplotlib

Using matplotlib backend: MacOSX


In [129]:
fig, ax = plt.subplots(figsize=(12, 4), dpi=200)
ax.plot(year_tmp_df['year'], year_tmp_df['avg_tmp'], linestyle='-', marker='o')
ax.set_title("全球各年平均气温,公众号：pypi")
ax.set_xlabel("年份")
ax.set_ylabel("温度平均数$ ^\circ C $")
plt.tight_layout()
plt.show()
fig.savefig("全球各年平均气温.png")

In [93]:
china_boundary = gpd.read_file(filename="中国地图边界202111版.json")

china_boundary_valid = china_boundary.copy()
china_boundary_valid['geometry'] = china_boundary.buffer(0)

In [88]:
fig, ax = plt.subplots()
china_boundary.boundary.plot(ax=ax)
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")
ax.set_title("中国地图边界图")
plt.tight_layout()

### 对比一下两个地图

In [95]:
fig, ax = plt.subplots(ncols=2)
china_boundary.boundary.plot(ax=ax[0])
china_boundary_valid.boundary.plot(ax=ax[1])
ax[0].set_title("原始中国地图")
ax[1].set_title("处理有效后的中国地图")
plt.tight_layout()


In [101]:
def pic(lon, lat) -> bool:
    """
    检测一个点是否在中国边界线内
    lon:东经
    lat:北纬
    :param lon:
    :param lat:
    :return:
    """
    return china_boundary_valid.contains(Point(lon, lat))[0]


pic(lon=116, lat=45)

Lon_data, Lat_data = np.meshgrid(raw_lon_data, raw_lat_data)

point_set_df = pd.DataFrame({'longitude': Lon_data.flatten(),
                             'latitude': Lat_data.flatten()})
# 检测每一个点是否在中国内

point_set_df['in_china'] = point_set_df.progress_apply(lambda x: pic(lon=x['longitude'], lat=x['latitude']), axis=1)
point_set_df.head()

100%|██████████| 259200/259200 [01:01<00:00, 4243.39it/s] 


Unnamed: 0,longitude,latitude,in_china
0,-179.75,-89.75,False
1,-179.25,-89.75,False
2,-178.75,-89.75,False
3,-178.25,-89.75,False
4,-177.75,-89.75,False


In [110]:
point_in_ch = point_set_df.loc[point_set_df['in_china']]

fig, ax = plt.subplots(figsize=(10, 7), dpi=200)
china_boundary.boundary.plot(ax=ax)
ax.scatter(point_in_ch['longitude'], point_in_ch['latitude'], c='red', s=1)
ax.set_xlabel("longitude")
ax.set_ylabel("latitude")
ax.set_title("检测点是否在中国境内， 公众号：pypi")
plt.tight_layout()
fig.savefig("检测点是否在中国境内.png")

## 中国地图边界裁剪

In [133]:
china_boundary_box = china_boundary_valid.bounds.iloc[0]

In [142]:
fig, ax = plt.subplots(figsize=(10, 7), dpi=150)
china_boundary.boundary.plot(ax=ax, color='black')
# 在中国内的点
point_in_ch = point_set_df.loc[point_set_df['in_china']]
ax.scatter(point_in_ch['longitude'], point_in_ch['latitude'], c='red', s=1)

# 不在中国内的点
point_notin_ch = point_set_df.loc[~point_set_df['in_china']]
ax.scatter(point_notin_ch['longitude'], point_notin_ch['latitude'], c='blue', s=1)

ax.set_xlabel("longitude")
ax.set_ylabel("latitude")
ax.set_title("检测点是否在中国境内， 公众号：pypi")
ax.set_xlim(china_boundary_box.minx-0.1, china_boundary_box.maxx+0.1)
ax.set_ylim(china_boundary_box.miny-0.1, china_boundary_box.maxy+0.1)
plt.tight_layout()
fig.savefig("检测点是否在中国境内2.png")
