forked from SophMC/notechain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
030-group_tseries.py
99 lines (62 loc) · 2.96 KB
/
030-group_tseries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
import numpy as np
import datetime as datetime
import matplotlib.pyplot as plt
# Creating a panel of timeseries for each group of stations.
# Panel will have a timeseries of 00,06,12,18 ws if that hour has at least 14
# obs per month.
# An average over the group will be an extra plot in the panel.
NAl=['60525Biskra','60549Mecheria','60550Elbayadh',
'60555Touggourt','60559ElOued','60566Ghardaia','60580Ouargla',
'60581HassiMessaoud']
CSar=['60607Timimoun','60611InAmenas','60620Adrar','60630InSalah',
'62103Ghadames','62124Sebha']
WSa=['61223Tombouctou','61226Gao','61230NioroDuSahel','61498Kiffa',
'61499AiounElAtrouss','61492Kaedi','61497Nema','61450Tidjika']
CSal=['61024Agadez','61045Goure','61052Niamey','64753Faya',
'61017Bilma']
Egy=['62387Minya','62393Asyut','62405Luxor','62414Asswan',
'62420Baharia','62423Farafra','62435Kharga']
Sud=['62600WadiHalfa','62640AbuHamed','62650Dongola','62660Karima',
'62680Atbara']
stations=[NAl,CSar,WSa,CSal,Egy,Sud]
group_names={'NAlgeria':NAl,'CSahara':CSar,'WSahel':WSa,'CSahel':CSal,
'Egypt':Egy,'Sudan':Sud}
def read_file(fname):
'''put the station name into read_file and read_file will return a
dataFrame called wind which has the following columns a dataframe with a
datetime index'''
column_names=["year","month","day","hour","ws"]
dtype={"year":int,"month":int,"day":int,"hour":int,"ws":float}
datafile='/home/sophie/projects/windspeed/data/%s_allwinds.txt' %fname
# specify the columns you want to group together. Can't include hour at
# this point as it is not in the right format.
date_spec = {'date_time': [0,1,2]}
# when you use keep_dat_col it keeps them as objects, not as the dtype you
# read them in as.
wind = pd.read_csv(datafile, sep=" ", names=column_names,
parse_dates=date_spec, keep_date_col=True, index_col=False )
# Dealing with hour - going from 600, 1200 etc to 6,12, 18
wind["hour"]=(wind["hour"]/100).astype(int)
# combining year, month, day that were parsed together into date_time with
# hour, which is now in the correct format.
wind['date_time'] = pd.to_datetime(wind.date_time) + \
wind.hour.astype('timedelta64[h]')
# make datetime the index before making subsections.
wind.index = wind['date_time']
# Adds extra rows where value is kept if it meets isin() criteria. Nan if
# it doesn't.
wind['ws_0']= wind['ws'][wind['hour'].isin([0])]
wind['ws_06']= wind['ws'][wind['hour'].isin([6])]
wind['ws_12']= wind['ws'][wind['hour'].isin([12])]
wind['ws_18']= wind['ws'][wind['hour'].isin([18])]
return wind
def group_mean(group):
'''loop over items in group list, using read_file on each one
return'''
pass
def plot_tseries():
'''set up n+1 subplots where n is number of stations in the group. Fill in
each plot with timeseries from each station and then a mean of all the
stations. Output to file eps.'''
pass