In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline

In [3]:
#' @noRd 
#' @name precintcon.ln.a
#' @author Lucas Venezian Povoa \email{lucasvenez@@gmail.com}
#' @aliases precintcon.ln.a 
#' @title ln(a)
#' @description Performe the calculation of ln(a) via the least-squares method. 
#' @usage precintcon.ln.a(X, Y) 
#' @param X is the cumulative percentage of rainy days.
#' @param Y is the cumulative percentage of rainfall amounts.
#' @return ln(a) 
#' @seealso \code{\link{precintcon.ci.analysis}} 
#' @keywords precipitation concentration index 
def precintcon_ln_a(X, Y):
    N = len(X)
    if (len(X) == len(Y)):
        a=(sum(X**2)*sum(np.log(Y)))
        Z=((sum(X**2)*sum(np.log(Y))) + (sum(X)*sum(X*np.log(X)))-(sum(X**2)*sum(np.log(X))) -\
        (sum(X)*sum(X*np.log(Y))))/(N*sum(X**2)-(sum(X)**2))#!!!
    else:
        print("X and Y with differents lengths!")
    return Z


In [4]:
#' @noRd
#' @name precintcon.b 
#' @author Lucas Venezian Povoa \email{lucasvenez@@gmail.com}
#' @aliases precintcon.b 
#' @title b 
#' @description Calculates b via the least-squares method. 
#' @usage precintcon.b(X, Y) 
#' @param X is the cumulative percentage of rainy days.
#' @param Y is the cumulative percentage of rainfall amounts.
#' @return \code{b} 
#' @seealso \code{\link{ci}} 
#' @keywords precipitation concentration index
def precintcon_b(X, Y):
    N=len(X)
    if (len(X) == len(Y)):
        Z=(N * sum(X * np.log(Y))+ \
        sum(X) * sum(np.log(X))- \
        N * sum(X * np.log(X)) - \
        sum(X) * sum(np.log(Y))) \
        /(N * sum(X**2) - sum(X)**2)/100  #!!!!!   100   
    else:
        print("X and Y have differents lengths!")
    return Z

In [5]:
# precintcon.fd.r
#the cumulative percentage of rainfall amounts.
def p_sum_p(o):
    i = o[:,0]
    f = o[:,1]
    n = o[:,2]
    m = (o[:,0] + o[:,1] + 0.1)/2
    #print(len(m))
    x = m[0] * n[0]
    #print(x)
    #print(len(i))
    #print(len(f))
    #print(len(n))
    for k in range(1,len(i)):#!!!!!!!1
        #print(k)
        x = np.append(x, m[k] * n[k])
        #print(x)
    y = x[0]/sum(x)
    #print(y)
    #print(len(x))
    for k in range(2,len(x)+1):
        y = np.append(y, sum(x[0:k])/sum(x))
    return(y)


In [6]:
#p_sum_n(X)
#the cumulative percentage of rainy days.
'''
def p_sum_n(o):
    x = o[0,0]
    y = o[:,0]
    #print(len(y))
    for i in range(1,len(y)+1):
        #x = pd.concat(x, sum(o[0:i])) #np.r_
        #print(sum(o[0:i]))
        x = np.append(x, sum(o[0:i,0]))
    return(x/sum(y))
'''
def p_sum_n(o):
    x = o[0]
    y=sum(o)
    #print(y)
    #print(len(y))
    for i in range(2,len(o)+1):
        #x = pd.concat(x, sum(o[0:i])) #np.r_
        #print(sum(o[0:i]))
        x = np.append(x, sum(o[0:i]))
        #print(x)
        n=x/y
    return(n)

In [7]:
# precintcon.classification.r
def precintcon_class(v,interval):
    result=[0,0,0]
    #s=0
    #python-Error Message: 'float' object cannot be interpreted as an integer
    for i in range(0,math.ceil(max(v)+interval),interval):
        #initial = i # 
        initial = i + 0.1 if i == 0  else i
        final   = i+interval-0.1
        s = sum((v[(v>= initial)& (v<=final)]).value_counts())#!!!!!!!!!!!!!!!!!!!!! no table
        #s += sum(v[(v>= initial)& (v<=final)])
        #print(initial, final, s)
        #if not s.empty: 
        if s>0 :
            #result= np.append(initial, final, s)
            result= np.row_stack((result, [initial, final, s]))
    return result[1:,:]#!!!1

In [8]:
def ci_pre(X):
    y=precintcon_class(X,1)
    sum_p=p_sum_p(y)
    sum_n=p_sum_n(y[:,2])
    a= np.exp(precintcon_ln_a(sum_n, sum_p))
    b= precintcon_b(sum_n, sum_p)
    A=(a/b)*(np.exp(b*100) * (100 - (1/b)))
    S= 5000-A
    ci= 2*S/10000 
    return ci

In [9]:
df = pd.read_csv('data_ci.csv', sep='\s+',skiprows=0,header=0,na_values=[-99,-9999])
df=df.dropna()
df.head()

Unnamed: 0,DATE,RAIN
0,1976/1/1,3.4
1,1976/1/2,1.9
2,1976/1/3,0.0
3,1976/1/4,0.0
4,1976/1/5,35.9


In [10]:
ci=ci_pre(df["RAIN"])

In [11]:
ci

0.47638956965387996

 test R DATA 1976
 dataset         a          b        r2        A
       X2 0.1080223 0.02287477 0.9847644 2618.052
         S        ci
     2381.948 0.476389

In [12]:
df = pd.read_csv('china_pre-before2000.csv', sep=',',skiprows=0,header=0,na_values=[-99,-9999,999990])
df=df.dropna()
df.head()

Unnamed: 0,Time,Stid,Lat,Lon,Height,rain,rain208,rain820
0,1951-01-01,50527,4913,11945,6766,0.0,0.0,0.0
1,1951-01-01,56571,2753,10218,15907,0.0,0.0,0.0
2,1951-01-01,58634,2840,11815,936,0.0,0.0,0.0
3,1951-01-01,56492,2849,10432,2750,0.0,0.0,0.0
4,1951-01-01,56386,2930,10345,3715,0.0,0.0,0.0


In [13]:
times=pd.to_datetime(df['Time'],format='%Y-%m-%d')

In [14]:
times.head()

0   1951-01-01
1   1951-01-01
2   1951-01-01
3   1951-01-01
4   1951-01-01
Name: Time, dtype: datetime64[ns]

In [15]:
from datetime import datetime
df['DATE'] = pd.to_datetime(df['Time'])
df['DATE'] = [datetime.strftime(x,'%Y') for x in df['DATE']]

In [16]:
df.head()
df=df.dropna()

In [17]:
bf=df[df["DATE"]=='2001']

In [18]:
df.head()

Unnamed: 0,Time,Stid,Lat,Lon,Height,rain,rain208,rain820,DATE
0,1951-01-01,50527,4913,11945,6766,0.0,0.0,0.0,1951
1,1951-01-01,56571,2753,10218,15907,0.0,0.0,0.0,1951
2,1951-01-01,58634,2840,11815,936,0.0,0.0,0.0,1951
3,1951-01-01,56492,2849,10432,2750,0.0,0.0,0.0,1951
4,1951-01-01,56386,2930,10345,3715,0.0,0.0,0.0,1951


df["DATE"]=df["DATE"].replace(['2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018'],['2001Y','2002Y','2003Y','2004Y','2005Y','2006Y','2007Y','2008Y','2009Y','2010Y','2011Y','2012Y','2013Y','2014Y','2015Y','2016Y','2017Y','2018Y'])

In [19]:
df.head()

Unnamed: 0,Time,Stid,Lat,Lon,Height,rain,rain208,rain820,DATE
0,1951-01-01,50527,4913,11945,6766,0.0,0.0,0.0,1951
1,1951-01-01,56571,2753,10218,15907,0.0,0.0,0.0,1951
2,1951-01-01,58634,2840,11815,936,0.0,0.0,0.0,1951
3,1951-01-01,56492,2849,10432,2750,0.0,0.0,0.0,1951
4,1951-01-01,56386,2930,10345,3715,0.0,0.0,0.0,1951


In [20]:
df.to_csv("china_year_read-before.csv",index=True,sep=',')

In [21]:
df[['rain','rain208','rain820']]=df[['rain','rain208','rain820']].applymap( lambda x: x if x<30000 else (x-30000 if x<31000 else (x-31000 if x<32000 else(x-32000 if x<32700 else x-32700))))

In [22]:
df.to_csv("china_year_read-before-correct.csv",index=False,sep=',')

#dfpivot = pd.pivot_table(bf,index=['DATE','Stid'],aggfunc={'rain':ci_pre})

https://github.com/lucasvenez/precintcon/blob/master/R/precintcon.fd.r
https://github.com/lucasvenez/precintcon/blob/master/R/precintcon.ln.a.r
http://www.dqkxqk.ac.cn/qhhj/qhhj/ch/html/20170304.htm