In [3]:
import pandas as pd
import dateparser
import datetime
import os
import re
import geopandas as gpd
from shapely.geometry import Point


In [28]:
OBSTIME = '(\d+)(\.\w+)'
P = re.compile("(SYNOP)%s" %(OBSTIME))
GUANGDONG = gpd.read_file(os.path.abspath('./../read_binary/data/XZQ_D.shp'))
class obs_from_file(object):
    def __init__(self,filename,nav=-888888.0,sep=' ',incr=1):
        self.filename  = filename
        self._find_filegroup(incr)
        self._read_data(nav,sep)
        self._compute_var()
        
    def _find_filegroup(self,incr):
        path = os.path.dirname(self.filename)
        file = os.path.basename(self.filename)
        m = P.search(file)
        tail = m.group(3)
        flag = m.group(1)
        otime=m.group(2)
        otime = dateparser.parse('20%s/%s/%s-%s:00:00' % (otime[:2],otime[2:4],otime[4:6],otime[6:8]))
        otime_b = otime - datetime.timedelta(hours=incr)
        otime_a = otime + datetime.timedelta(hours=incr)
        self.filename_b = path+os.sep+flag+otime_b.strftime('%y%m%d%H')+tail
        self.filename_a = path+os.sep+flag+otime_a.strftime('%y%m%d%H')+tail
        
    def _read_data(self,nav,sep):
        data = pd.read_csv(self.filename,sep=sep,na_values=nav,index_col='station_id')
        if os.path.isfile(self.filename_b):
            data_b = pd.read_csv(self.filename_b,sep=sep,na_values=nav,index_col='station_id')
        else:
            data_b = data.copy()
        if os.path.isfile(self.filename_a):
            data_a = pd.read_csv(self.filename_a,sep=sep,na_values=nav,index_col='station_id')
        else:
            data_a = data.copy()
        data_a.columns=data_a.columns.map(lambda x : x[:]+'_a')
        data_b.columns=data_b.columns.map(lambda x : x[:]+'_b')    
        df = pd.concat([data,data_b,data_a],axis=1)
        df=df[:][pd.notnull(df['longitude'])]
        need_to_drop=[]
        for col in list(df):
            if col.endswith('_a') or col.endswith('_b'):
                need_to_drop.append(col)
                continue
            missing_vals = pd.isnull(df[col])
            df[col][missing_vals] = df[col+'_b'][missing_vals]
            missing_vals = pd.isnull(df[col])
            df[col][missing_vals] = df[col+'_a'][missing_vals]
        need_to_drop.append('rain_6')
        need_to_drop.append('rain_24')
        self.df = df.drop(need_to_drop,axis=1)
        
    def _compute_var(self):
        geometry = [Point(xy) for xy in zip(self.df.longitude,self.df.latitude)]
        crs = {'init':'epsg:4326'}
        data = gpd.GeoDataFrame(self.df,crs=crs,geometry=geometry)
        GUANGDONG.crs = data.crs
        data_guangdong = gpd.sjoin(data,GUANGDONG,how='inner')
        self.data_guangdong1 = data_guangdong
        self.mean = data_guangdong.mean()

In [29]:
obs_sample = obs_from_file(os.path.abspath('D:\data_synop04\SYNOP17040103.dat'))

In [33]:
%pylab

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [43]:
obs_sample.mean

latitude                23.204444
longitude              113.793889
hour                     3.000000
min                      0.000000
level                    1.000000
elevation               65.833333
pressure              1012.975000
psl                   1022.802778
u_wind                  -1.498333
v_wind                  -2.174889
temperatura            291.812778
dewp_temperatura       283.421111
humidity                62.305556
index_right             11.972222
DZZSTLSMJ           148006.194444
QLXS                     7.070162
2000                  9570.216567
dtype: float64

In [41]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()

ax.set_aspect('equal')
GUANGDONG.plot(ax=ax)
obs_sample.data_guangdong1.plot(ax=ax, marker='o', color='red', markersize=5)
plt.show()