## Download IMD gridded temperature and rainfall data
https://www.imdpune.gov.in/Clim_Pred_LRF_New/Grided_Data_Download.html

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import os
import imdlib as imd

### Temperature
#### Download

In [None]:
def download_temp_IMD(var, start, end):
    """Scrape website to download data files"""
    
    from selenium import webdriver
    from selenium.webdriver.support.ui import Select
    from selenium.webdriver.common.by import By
    from webdriver_manager.chrome import ChromeDriverManager
    import time

    if var == 'tmax':
        j = 3
        v = 'maxtemp'
    elif var == 'tmin':
        j = 4
        v = 'mintemp'
    
    options = webdriver.ChromeOptions() ;
    prefs = {"download.default_directory" : "./%s"%var};
    #example: prefs = {"download.default_directory" : "C:\Tutorial\down"};
    options.add_experimental_option("prefs", prefs);
    
    driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)

    for year in range(start, end+1):

        driver.get("https://www.imdpune.gov.in/Clim_Pred_LRF_New/Grided_Data_Download.html")

        classes = driver.find_elements(By.CLASS_NAME, value='form-inline')

        temp_class = classes[j]

        e = temp_class.find_element(by='id', value=v)
        select = Select(e)
        select.select_by_visible_text('%d'%year)

        d = temp_class.find_elements(By.XPATH, "//form[@name='rain']//input[@class='btn btn-success']")[j-2]
        d.click()


    time.sleep(60)            

    driver.quit()

In [None]:
# download_temp_IMD(var='tmax', start=1951, end=2020)
# download_temp_IMD(var='tmin', start=1951, end=2020)

#### Rename files

In [None]:
# base = './tmax/csv'
# for file in os.listdir(base):
# #     print(file)
#     os.rename('%s%s'%(base, file), '%s%s.GRD'%(base, file.split('.')[0][-4:]))

# base = './tmin/csv'
# for file in os.listdir(base):
# #     print(file)
#     os.rename('%s%s'%(base, file), '%s%s.GRD'%(base, file.split('.')[0][-4:]))

#### Convert to csv

In [None]:
def toCSV(var, start, end):
    """
    # you should be connected to internet for downloading the data
    #-9999 value is for no data in saved csv file
    # This code will convert the IMD gridded data to csv file
    if you have data already downloaded then create folder named rain/tmax/tmin inside any folder and
    copy yearly data files in the respective folder and rename yearly data file as year name i.e 1951.GRD 1952.GRD etc and
    comment the line imd.get_data(variable,start_yr) and run the code it will convert the binary .GRD data into csv file
    """
    start_yr = start # give starting year from which you want to download/convert data: 1901 ownwards for rainfall, 1951 for tmax and tmin
    end_yr = end # give ending year upto which you want to download/convert data
    variable = var # give variable name (rain for rainfall at 0.25 deg, tmax or tmin for rainfall, min or max temperature at 1 deg resolution)
    file_format = 'yearwise' # other option (None), which will assume deafult imd naming convention
    file_dir = './%s/grd/'%var # this path should be same as mentioned in previous line
    data = imd.open_data(variable, start_yr, end_yr,'yearwise', file_dir) # this will open the data downloaded and saved in the location mentioned in previous line
    if variable == 'rain':
        grid_size = 0.25 # grid spacing in deg
        y_count = 129 # no of grids in y direction
        x_count = 135 # no of grids in x direction
        x = 66.5 # starting longitude taken from control file (.ctl)
        y = 6.5 # starting latitude taken from control file (.ctl)
    elif variable == 'tmax' or variable == 'tmin':
        grid_size = 1 # grid spacing in deg
        y_count = 31 # no of grids in y direction
        x_count = 31 # no of grids in x direction
        x = 67.5 # starting longitude taken from control file (.ctl)
        y = 7.5 # starting latitude taken from control file (.ctl)

    #print(grid_size,x_count, y_count, x, y)
    data
    data.shape
    np_array = data.data
    #print(np_array[0,0,0])
    #xr_objecct = data.get_xarray()
    #type(xr_objecct)
    #xr_objecct.mean('time').plot()
    years_no = (end_yr - start_yr) + 1
    #print(years_no)
    day = 0
    for yr in range(0,years_no):
        f = open("./%s/csv/%s_%s.csv"%(var, var, start_yr+yr),'w') # just change the path where you want to save csv file
        if ((start_yr+yr) % 4 == 0) and ((start_yr+yr) % 100 != 0):  # check for leap year
            days = 366
            count = yr + days
        elif ((start_yr+yr) % 4 == 0) and ((start_yr+yr) % 100 == 0) and ((start_yr+yr) % 400 == 0):
            days = 366
            count = yr + days
        else:
            days = 365
            count = yr + days

        day = day + days

        f.write("X,Y,")
        for d in range(0, days):
            f.write(str(d+1))
            f.write(",")
        f.write("\n")
        #print(np_array[364,0,0])
        for j in range(0, y_count):

            for i in range(0, x_count):

                f.write(str((i * grid_size) + x))
                f.write(",")
                f.write(str((j * grid_size) + y))
                f.write(",")
                time = 0
                for k in range(day-days, day):

                    val = np_array[k,i,j]
                    if val == 99.9000015258789 or val == -999:
                        f.write(str(-9999))
                        f.write(",")
                    else:
                        f.write(str(val))
                        f.write(",")


                f.write("\n")
        print("File for " + str(start_yr + yr) + "_" + str(variable) + " is saved")
    print("CSV conversion successful !")

In [None]:
# toCSV('tmin', 1951, 2020)
# toCSV('tmax', 1951, 2020)

#### Convert to netCDF

In [None]:
data_tmax = imd.open_data('tmax', 1951, 2020, 'yearwise', './tmax/grd/').data
data_tmin = imd.open_data('tmax', 1951, 2020, 'yearwise', './tmin/grd/').data

In [None]:
ds_tmax = xr.DataArray(data_tmax.transpose(0,2,1), 
                       coords={'time': pd.to_datetime('1951-01-01')+pd.to_timedelta(np.arange(data_tmax.shape[0]), 'D'),
                               'lat': np.arange(7.5, 7.5+31*1, 1),
                               'lon': np.arange(67.5, 67.5+31*1, 1)
                               },
                       dims=['time', 'lat', 'lon']
                      )

ds_tmin = xr.DataArray(data_tmin.transpose(0,2,1), 
                       coords={'time': pd.to_datetime('1951-01-01')+pd.to_timedelta(np.arange(data_tmin.shape[0]), 'D'),
                               'lat': np.arange(7.5, 7.5+31*1, 1),
                               'lon': np.arange(67.5, 67.5+31*1, 1)},
                       dims=['time', 'lat', 'lon']
                      )

ds_tmax = ds_tmax.where(ds_tmax != 99.9000015258789)
ds_tmax = ds_tmax.where(ds_tmax != -999.)
ds_tmin = ds_tmin.where(ds_tmin != 99.9000015258789)
ds_tmin = ds_tmin.where(ds_tmin != -999.)

In [None]:
for year in set(ds_tmax.time.dt.year.values): 
    ds_tmax.loc[ds_tmax.time.dt.year==year].rename('tmax').to_netcdf('./tmax/netcdf/tmax_%s.nc'%year)
    ds_tmin.loc[ds_tmin.time.dt.year==year].rename('tmin').to_netcdf('./tmin/netcdf/tmin_%s.nc'%year)

In [None]:
xr.open_mfdataset('./tmin/netcdf/*.nc').tmin.mean('time').plot()

### Rainfall

#### Download

In [None]:
def download_rainfall(start, end):
    options = webdriver.ChromeOptions() ;
    prefs = {"download.default_directory" : "./rain/netcdf/"};
    #example: prefs = {"download.default_directory" : "C:\Tutorial\down"};
    options.add_experimental_option("prefs", prefs);
    
    driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
    
    for year in range(start, end):

        driver.get("https://www.imdpune.gov.in/Clim_Pred_LRF_New/Grided_Data_Download.html")

        classes = driver.find_elements(By.CLASS_NAME, value='form-inline')

        for i in classes:
            if i.get_attribute('name') == 'RF25':

                e = i.find_element(by='id', value='RF25')
                select = Select(e)
                select.select_by_visible_text('%d'%year)

                d = i.find_element(By.XPATH, "//form[@name='RF25']//input[@class='btn btn-success']")
                d.click()

    time.sleep(60)            

    driver.quit()

#### Rename and clean files

In [7]:
for file in os.listdir('./rain/raw_netcdfs/'):
    year = int(file.split('.')[0][-4:])
    print(year)
    
    ds = xr.open_dataset('./rain/raw_netcdfs/%s'%file)

    if year == 2005:
        ds['lon'] = np.arange(66.5, 100.25, 0.25)
        ds['lat'] = np.arange(6.5, 38.75, 0.25)
        ds = ds.where(ds['rainfall'] != 0)
        
    if 'TIME' in ds.dims:
        ds = ds.rename({'TIME':'time', 'LONGITUDE':'lon', 'LATITUDE':'lat'})
    
    if 'RAINFALL' in ds.keys():
        ds = ds['RAINFALL']
    else:
        ds = ds['rainfall']
    ds = ds.rename('rainfall')
    
    ds.to_netcdf('./rain/netcdf/rainfall_%s.nc'%file.split('.')[0][-4:])

1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006




2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [8]:
def preprocess(ds):
    
#     if 'RAINFALL' in list(ds.var()):
#         ds = ds.RAINFALL
#     elif 'rainfall' in list(ds.var()):
#         ds = ds.rainfall
    
#     if 'LATITUDE' in ds.coords:
# #         ds = ds.rename_dims({'TIME':'time', 'LATITUDE':'lat', 'LONGITUDE':'lon'})
#         ds = ds.rename({'TIME':'time', 'LONGITUDE':'lon', 'LATITUDE':'lat'})    
    print(ds.lon.values[-1])
    return ds

In [10]:
ds = xr.open_mfdataset('./rain/netcdf/*.nc')

In [30]:
decadal = ds.rainfall.resample(time='10Y').max()
decadal

Unnamed: 0,Array,Chunk
Bytes,1.73 MiB,136.05 kiB
Shape,"(13, 129, 135)","(1, 129, 135)"
Count,673 Tasks,13 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.73 MiB 136.05 kiB Shape (13, 129, 135) (1, 129, 135) Count 673 Tasks 13 Chunks Type float64 numpy.ndarray",135  129  13,

Unnamed: 0,Array,Chunk
Bytes,1.73 MiB,136.05 kiB
Shape,"(13, 129, 135)","(1, 129, 135)"
Count,673 Tasks,13 Chunks
Type,float64,numpy.ndarray


In [None]:
xr.plot.FacetGrid(ds, col='time')