In [1]:
__author__ = 'shangxing'
#!/usr/bin/env python
#
#  Python program that will create the sample solution
#
#  @author: lak@climate.com
#  @version: $Id:$
#
#  you may have to install the master branch of 'dask' using:
#     pip install --upgrade git+https://github.com/blaze/dask
#
# Rate from Z and Zdr

# For the reprocessed rain rate, it was found that the Brandes, et al. (2002) formulation produced more consistent results for the tropical Maldives environment than the in-field, real-time formulation. See Brandes, et al., eqn. 7. Note that the form of the equation used for the Z/Zdr relation is very sensitive to the value of the coefficients, particularly for low rain rates.

# For the Z-ZH Rate used in the field, see the Footnotes.

#   ZDR is a linear ratio - (ZH / ZV).

#     RATE_Z_ZDR = zzdr_aa * (ZH ** zzdr_bb) * (ZDR ** zzdr_cc)

#   where

#     zzdr_aa = 0.00746
#     zzdr_bb = 0.945
#     zzdr_cc = -4.76


import time
import pandas as pd
import numpy as np
import sys
import pickle as df
import math

In [2]:

# change the location of the downloaded test file as necessary.
infile="../data/test.csv"
#infile="kaggle/sample.csv"
outfile="../data/new_solution_zdr_ref_11_12.csv"

In [3]:
# read file
t = time.time()
alldata = pd.read_csv(infile)

print 'Time elapsed: {:.4f} secs'.format(time.time()-t) 

Time elapsed: 22.2713 secs


In [4]:
t = time.time()
zzdr_aa = 0.00746
zzdr_bb = 0.945
zzdr_cc = -4.76

#RATE_Z_ZDR = zzdr_aa * (ZH ** zzdr_bb) * (ZDR ** zzdr_cc)

def rainRate_REF_ZDR(ref,zdr, minutes_past):
    #print "Estimating rainfall from {0} observations".format(len(minutes_past))
    # how long is each observation valid?
    valid_time = np.zeros_like(minutes_past)
    valid_time[0] = minutes_past.iloc[0]
    for n in range(1, len(minutes_past)):
        valid_time[n] = minutes_past.iloc[n] - minutes_past.iloc[n-1]
    valid_time[-1] = valid_time[-1] + 60 - np.sum(valid_time)
    valid_time = valid_time / 60.0

    # sum up rainrate * validtime
    sum = 0
    for ref_temp,zdr_temp, hours in zip(ref,zdr, valid_time):
        # See: https://en.wikipedia.org/wiki/DBZ_(meteorology)
        if np.isfinite(ref_temp) and np.isfinite(zdr_temp):
            ref_temp = pow(10,ref_temp/10)
            mmperhr = zzdr_aa*(pow(ref_temp,zzdr_bb))*(pow(zdr_temp,zzdr_cc))
            sum = sum + mmperhr * hours
    return sum


# each unique Id is an hour of data at some gauge
def myfunc(hour):

    hour = hour.sort('minutes_past', ascending=True)
    est = rainRate_REF_ZDR(hour['Ref'], hour['Zdr'],hour['minutes_past'])
    return est


#return(sum((sign(Kdp)*(40.6)*(abs(Kdp) **.866)*valid_time), na.rm=TRUE))
print 'Time elapsed: {:.4f} secs'.format(time.time()-t) 

Time elapsed: 0.0009 secs


In [5]:
t = time.time()
estimates = alldata.groupby(alldata.Id).apply(lambda d: myfunc(d))

df = pd.DataFrame(estimates,columns=['Expected'])

df.to_csv(outfile, header=True)
print 'Time elapsed: {:.4f} secs'.format(time.time()-t) 

Time elapsed: 929.7891 secs


