In [1]:
import pandas as pd
import numpy as np
import glob
import time
import datetime
import os
import json
# from tqdm import tqdm,trange

In [2]:
# 根据水平方向风速和垂直方向风速计算出合成的总风速以及风向角度
def gather_uvwind(abs_vwind,abs_uwind,v_pos_idx,u_pos_idx):
    big_wind_counter = 0
    all_counter = 0
    tan_wind = abs_vwind/abs_uwind
    # 矫正到第一象限的角度
    angle_wind = np.arctan(tan_wind)
    #合成风速
    wind_speed = np.cos(angle_wind)*abs_uwind+np.sin(angle_wind)*abs_vwind
    big_wind_counter += np.sum(wind_speed>15)
    all_counter += len(wind_speed)
    # 将矫正到第一象限的角度修正到实际的角度
    angle_wind = (angle_wind/np.pi)*180
    for i in range(len(angle_wind)):
        # 第一象限
        if v_pos_idx[i]==True and u_pos_idx[i]==True:
            pass
        #第二象限
        elif v_pos_idx[i]==True and u_pos_idx[i]==False:
            angle_wind[i] = 180-angle_wind[i]
        #第三象限
        elif v_pos_idx[i]==False and u_pos_idx[i]==False:
            angle_wind[i] = 180+angle_wind[i]
        # 第四象限
        elif v_pos_idx[i]==False and u_pos_idx[i]==True:
            angle_wind[i] = 360-angle_wind[i]
        else:
            raise NameError
    print("风速范围:",np.min(wind_speed),"  ",np.max(wind_speed))
    print("风向范围:",np.min(angle_wind),"  ",np.max(angle_wind))
    return wind_speed,angle_wind

In [3]:
# 直接生成离线平台需要的格式
weather = "W001"
version = "v7"
result_dir = "./processed_data/{}_processed{}".format(weather,version)
if not os.path.exists(result_dir):
    os.mkdir(result_dir)
weather_set = "anhui_wind_{}".format(weather)
result_format = "SJQXT0_xz{}all".format(weather)
file_list = glob.glob(".\\20210228_raw_anhui_data\\"+weather_set+"\\*.csv")
print(file_list)
# 根据特征筛选.ipynb脚本结果，手动将usecols改成筛选出来的特征
usecols = ["DateTime","RHU_112","UWIND_001","AHU_120","TMP_112","TMP_106","TMP_103"]
min_max_dict = {}
for file in file_list:
    csv_data = pd.read_csv(file,index_col=None,header=0,usecols=usecols)
    # 温度转化，将开式温度转换为摄氏温度
    csv_data["TMP_112"] = csv_data["TMP_112"]-273.15
    csv_data["TMP_106"] = csv_data["TMP_106"]-273.15
    csv_data["TMP_103"] = csv_data["TMP_103"]-273.15

    #时间格式转换
    result_csv = pd.DataFrame()
    result_csv["datatime"] = csv_data["DateTime"].values
    result_csv["datatime"] = pd.to_datetime(result_csv["datatime"],format='%Y-%m-%d %H:%M:%S')
    result_csv["datatime"] = result_csv["datatime"].apply(lambda x:x.strftime("%d/%m/%Y %H:%M:%S"))
   
    for i in usecols:
        if i=="DateTime":
            continue
        result_csv[i] = csv_data[i].values
    
    for col in result_csv.columns:
        if col != "datatime" and col not in min_max_dict:
            min_max_dict[col] = {"min":float("inf"),"max":-99}
    # 记录各个特征的最大最小值，用于后续程序的min-max归一化        
    for col in result_csv.columns:
        if col !="datatime":
            min_max_dict[col]["min"] = min(min_max_dict[col]["min"],np.min(result_csv[col]))
            min_max_dict[col]["max"] = max(min_max_dict[col]["max"],np.max(result_csv[col]))
    farm_J_name = file.split("\\")[-1].split("_")[0]
    farm_P_name = farm_J_name[:8]+"P"+farm_J_name[9:]
    print(farm_P_name)
    result_csv.to_csv("./{}/{}{}.csv".format(result_dir,farm_P_name,result_format),index=None)
print(min_max_dict)

['.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J001_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J002_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J003_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J004_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J005_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J006_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J007_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J008_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J009_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J010_W001_R_20180103-20201107.csv', '.\\20210228_raw_anhui_data\\anhui_wind_W001\\EEE09001J011_W001_R_20180103-20201107.csv', '.\\20210

In [4]:
# 生成筛选后特征的json文件，含有对应特征的最大最小值
# 保证顺序
save_dir = "./json/{}/{}.json".format(version,weather)
result_csv_columns = list(result_csv.columns)
result_csv_columns.remove("datatime")
assert(result_csv_columns==list(min_max_dict.keys()))
result_dict = {}
# result_dict["real_day"] = ["loop", 366, -1]
# result_dict["real_hour"] = ["loop", 96, -1]
for col in result_csv_columns:
    max_num = min_max_dict[col]["max"]
    min_num = min_max_dict[col]["min"]
    result_dict["xz{}all_{}".format(weather,col)] = ["float",round(max_num,2),round(min_num,2)]

b = json.dumps(result_dict)
f2 = open(save_dir, 'w')
f2.write(b)
f2.close()