# 生成背景场和观测数据

这个notebook用于生成背景场和观测的数据

背景场由再分析作为初始场，使用FourCastNet做逐小时预报到72小时得到。

观测由再分析作为真值，随机采样比例5%、10%、50%、100%，再添加均值的1.5%为标准差的高斯误差得到

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Depending on your combination of package versions, this can raise a lot of TF warnings... 
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import torch
# import seaborn as sns
import pickle
import sys
sys.path.append('../')
from src.utils.score import *
from src.utils.plot import plot_iter_result
from collections import OrderedDict

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

In [4]:
# sns.set_style('darkgrid')
# sns.set_context('notebook')

In [5]:
DATADIR = '/dataset/z500_5.625deg/geopotential_500_5.625deg'
mode = 'train'

## 读取预测数据集

从.nc文件中读取数据，为后续预测技巧的验证提供基础数据支撑

In [6]:
data = xr.open_mfdataset(f'{DATADIR}/{mode}/*1979*.nc', combine='by_coords')
data

Unnamed: 0,Array,Chunk
Bytes,68.44 MiB,68.44 MiB
Shape,"(8760, 32, 64)","(8760, 32, 64)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 68.44 MiB 68.44 MiB Shape (8760, 32, 64) (8760, 32, 64) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",64  32  8760,

Unnamed: 0,Array,Chunk
Bytes,68.44 MiB,68.44 MiB
Shape,"(8760, 32, 64)","(8760, 32, 64)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [7]:
from pathlib import Path
from src.data_factory.convert_z500 import convert_z500
convert_z500()

with open(Path('/model/data')/f'scaler.pkl', 'rb') as f:
    item = pickle.load(f)
    lon = item['lon']
    lat = item['lat']
    mean = item['mean']
    std = item['std']
    f.close()

mean, std

/model/data:存在


(54108.31062925485, 3352.3980519318557)

## 加载训练好的模型参数

In [8]:
from src.models.prednn_module import PredNNLitModule
afnonets = {}
# 1小时步长
module = PredNNLitModule.load_from_checkpoint('/dataset/pretrain_weghts/pretrain_lead1.ckpt')
afnonets['1'] = module.net.to(device).eval()
# 3小时步长
module = PredNNLitModule.load_from_checkpoint('/dataset/pretrain_weghts/pretrain_lead3.ckpt')
afnonets['3'] = module.net.to(device).eval()
# 6小时步长
module = PredNNLitModule.load_from_checkpoint('/dataset/pretrain_weghts/pretrain_lead6.ckpt')
afnonets['6'] = module.net.to(device).eval()
# 12小时步长
module = PredNNLitModule.load_from_checkpoint('/dataset/pretrain_weghts/pretrain_lead12.ckpt')
afnonets['12'] = module.net.to(device).eval()
# 24小时步长
module = PredNNLitModule.load_from_checkpoint('/dataset/pretrain_weghts/pretrain_lead24.ckpt')
afnonets['24'] = module.net.to(device).eval()

  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."


## 构建预报结果

使用AFNONet做预测，将72小时预测结果写入nc文件中
预报步长为1h，预报长度为7天，每隔3小时选一个初始场，每隔24小时存一次数据

首先写出存下72小时预报结果的代码

In [9]:
dt = 1
prediction_length = 1 + (24 * 7) // dt
DECORRELATION_TIME = 3

In [10]:
n_samples_all = len(data['z'])//dt
n_samples = n_samples_all - prediction_length
stop = n_samples
ics = np.arange(0, stop, DECORRELATION_TIME)
n_ics = len(ics)
n_ics

2864

In [11]:
def autoregressive_inference(ic, valid_data_full, model, dt, prediction_length):
    ic = int(ic)
    device = device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    dt = dt
    prediction_length = int(prediction_length)

    seq_pred = torch.zeros((prediction_length, 1, 32, 64)).to(device, dtype=torch.float)

    init_data = valid_data_full['z'][ic].values #extract valid data from first year
    # standardize
    init_data = (init_data - mean)/std
    init_data = torch.as_tensor(init_data).to(device, dtype=torch.float)

    with torch.no_grad():
        for i in range(prediction_length):
            # 从ic开始
            if i==0: #start of sequence                        
                seq_pred[0] = init_data
                future_pred = model(torch.unsqueeze(seq_pred[0], dim=0))
            else:
                future_pred = model(future_pred) #autoregressive step

            if i < prediction_length-1: #not on the last step
                seq_pred[i+1] = future_pred
                         
        pred_nc = xr.DataArray(
            seq_pred.cpu().detach().numpy()[24::24] * std + mean,
            dims=['lead_time', 'time', 'lat', 'lon'],
            coords={
                'lead_time': np.arange(24, prediction_length*dt, 24),
                'time': valid_data_full.time.values[ic:ic+1], 
                'lat': valid_data_full.lat.values, 
                'lon': valid_data_full.lon.values
            },
            name='z'
        )
    del init_data, model, seq_pred, future_pred 
    return pred_nc

In [None]:
fcs = []
for i, ic in enumerate(ics):
    fc = autoregressive_inference(ic, data, afnonets['1'], dt, prediction_length)
    fcs.append(fc)
    del fc

In [None]:
fc_iter = xr.merge(fcs)
fc_iter