In [7]:
import pandas as pd
import numpy as np
from pathlib import Path

In [60]:
def load_data(file: Path, **kwargs):
    # reads data into df then and prints description and info
    if file.suffix == '.xlsx' or file.suffix == '.xls':
        df = pd.read_excel(file, **kwargs)
    elif file.suffix == '.csv':
        df = pd.read_csv(file, **kwargs)
    else:
        print(file.suffix)
        raise NotImplementedError

    print(f"===== df.info output =====")
    df.info()
    print(f"\n\n===== df.describe output =====\n", df.describe())
    return df

In [61]:
cwd = Path.cwd()  # reads current working directory to simplify working with files
weather_df = load_data(cwd / "data" / "ICRISAT Weather 1978 to 2018.xlsx")

===== df.info output =====
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14853 entries, 0 to 14852
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Station    14853 non-null  object        
 1   Date       14853 non-null  datetime64[ns]
 2   MaxT       14853 non-null  float64       
 3   MinT       14853 non-null  float64       
 4   RH1        14853 non-null  int64         
 5   RH2        14853 non-null  float64       
 6   Wind       14853 non-null  float64       
 7   Rain       14853 non-null  float64       
 8   SSH        14853 non-null  float64       
 9   Evap       14853 non-null  float64       
 10  Radiation  14852 non-null  float64       
 11  FAO56_ET   14853 non-null  float64       
 12  Lat        14853 non-null  float64       
 13  Lon        14853 non-null  float64       
 14  Cum_Rain   14853 non-null  float64       
dtypes: datetime64[ns](1), float64(12), int64(1), object(1)
memor

In [63]:
drought_df = load_data(cwd / "data" / "data_17.625_78.375.csv", header=None,
                       delim_whitespace=True, names=['year', 'month', 'idi'])

===== df.info output =====
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 792 entries, 0 to 791
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    792 non-null    int64  
 1   month   792 non-null    int64  
 2   idi     792 non-null    float64
dtypes: float64(1), int64(2)
memory usage: 18.7 KB


===== df.describe output =====
              year       month           idi
count   792.00000  792.000000  7.920000e+02
mean   1983.50000    6.500000 -1.055556e-07
std      19.06241    3.454234  9.999984e-01
min    1951.00000    1.000000 -2.983100e+00
25%    1967.00000    3.750000 -7.323450e-01
50%    1983.50000    6.500000 -2.742200e-03
75%    2000.00000    9.250000  7.447425e-01
max    2016.00000   12.000000  2.365900e+00


In [108]:
ddf = (drought_df
       .assign(change_sign=lambda x: np.sign(x.idi).diff().ne(0),
               is_pos=lambda x: x.idi.gt(0),
               ds =lambda x: x.idi.gt(0).astype(int).replace(0, -1),
               days = lambda x: abs(x.groupby((x.ds != x.ds.shift()).cumsum()).cumsum().loc[:, 'ds'])
               )
       )

In [109]:
ddf

Unnamed: 0,year,month,idi,change_sign,is_pos,ds,days
0,1951,1,-0.74581,True,False,-1,1
1,1951,2,-0.75763,False,False,-1,2
2,1951,3,-0.60874,False,False,-1,3
3,1951,4,-0.41773,False,False,-1,4
4,1951,5,-0.28236,False,False,-1,5
...,...,...,...,...,...,...,...
787,2016,8,-0.42398,False,False,-1,3
788,2016,9,0.70726,True,True,1,1
789,2016,10,1.39290,False,True,1,2
790,2016,11,0.79214,False,True,1,3


In [84]:
ddf.groupby((ddf['_'] != ddf['_'].shift()).cumsum()).cumsum()

KeyError: '_'

In [23]:
(drought_df
 .groupby(drought['idi'].eq(0).cumsum()).cumcount()
 )

# df.groupby((df['x_days_since_sign_change'] != df['x_days_since_sign_change'].shift()).cumsum()).cumsum()


0        0
1        1
2        2
3        3
4        4
      ... 
787    787
788    788
789    789
790    790
791    791
Length: 792, dtype: int64

In [20]:
ddf

Unnamed: 0,year,month,idi,change_sign,is_pos
0,1951,1,-0.74581,1,False
1,1951,2,-0.75763,2,False
2,1951,3,-0.60874,3,False
3,1951,4,-0.41773,4,False
4,1951,5,-0.28236,5,False
...,...,...,...,...,...
787,2016,8,-0.42398,788,False
788,2016,9,0.70726,789,True
789,2016,10,1.39290,790,True
790,2016,11,0.79214,791,True
