In [1]:
import csv
lat_lng = {}
with open('Indian_States_Coordinates.csv', 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        if row[0] == 'State':
            continue
        if row[0] == 'Indian_States_Coordinates':
            continue
        lat_lng[row[0]] = {'lat': row[1], 'lng': row[2]}

In [2]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import numpy as np

cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
url = "https://archive-api.open-meteo.com/v1/archive"
daily_data = {"state": np.array([]), "date": np.empty(0, dtype = 'datetime64[D]'), "weather_code": np.array([]), "temperature_2m_min": np.array([]), "temperature_2m_max": np.array([]), "precipitation_sum": np.array([]), "rain_sum": np.array([]), "snowfall_sum": np.array([])}

In [6]:
for city in lat_lng:
    params = {
        "latitude": lat_lng[city]['lat'],
        "longitude": lat_lng[city]['lng'],
        "start_date": "2023-01-01", # adjust this value
        "end_date": "2023-12-31", # adjust this value
        "daily": ["weather_code", "temperature_2m_min", "temperature_2m_max", "precipitation_sum", "rain_sum", "snowfall_sum"],
        "timezone": "auto"
    }
    
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    daily = response.Daily()
    daily_weather_code = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
    daily_temperature_2m_max = daily.Variables(2).ValuesAsNumpy()
    daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(4).ValuesAsNumpy()
    daily_snowfall_sum = daily.Variables(5).ValuesAsNumpy()

    date = {"date": pd.date_range(
        start = pd.to_datetime(daily.Time(), unit = "s"),
        end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
        freq = pd.to_timedelta(daily.Interval(), unit = "s"),
        inclusive = "left"
    )}
    
    np_date = np.array(date["date"])
    np_date += np.timedelta64(5, 'h') + np.timedelta64(30, 'm')
    np_date = np_date.astype('datetime64[D]')
    daily_data["state"] = np.concatenate((daily_data["state"], np.full(np.size(daily_weather_code), city)), axis=0)
    daily_data["date"] = np.concatenate((daily_data["date"], np_date), axis=0)
    daily_data["weather_code"] = np.concatenate((daily_data["weather_code"], daily_weather_code), axis=0)
    daily_data["temperature_2m_min"] = np.concatenate((daily_data["temperature_2m_min"], daily_temperature_2m_min), axis=0)
    daily_data["temperature_2m_max"] = np.concatenate((daily_data["temperature_2m_max"], daily_temperature_2m_max), axis=0)
    daily_data["precipitation_sum"] = np.concatenate((daily_data["precipitation_sum"], daily_precipitation_sum), axis=0)
    daily_data["rain_sum"] = np.concatenate((daily_data["rain_sum"], daily_rain_sum), axis=0)
    daily_data["snowfall_sum"] = np.concatenate((daily_data["snowfall_sum"], daily_snowfall_sum), axis=0)
    
daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe.to_csv('weather_2023.csv', index=False) # adjust the name of file based on year

In [4]:
df1 = pd.read_csv('weather_1998.csv')
df2 = pd.read_csv('weather_1999.csv')
df3 = pd.read_csv('weather_2000.csv')
df4 = pd.read_csv('weather_2001.csv') 
df5 = pd.read_csv('weather_2002.csv')
df6 = pd.read_csv('weather_2003.csv')
df7 = pd.read_csv('weather_2004.csv')
df8 = pd.read_csv('weather_2005.csv')
df9 = pd.read_csv('weather_2006.csv')
df10 = pd.read_csv('weather_2007.csv')
df11 = pd.read_csv('weather_2008.csv')
df12 = pd.read_csv('weather_2009.csv')
df13 = pd.read_csv('weather_2010.csv')
df14 = pd.read_csv('weather_2011.csv')
df15 = pd.read_csv('weather_2012.csv')
df16 = pd.read_csv('weather_2013.csv')
df17 = pd.read_csv('weather_2014.csv')
df18 = pd.read_csv('weather_2015.csv')
df19 = pd.read_csv('weather_2016.csv')
df20 = pd.read_csv('weather_2017.csv')
df21 = pd.read_csv('weather_2018.csv')
df22 = pd.read_csv('weather_2019.csv')
df23 = pd.read_csv('weather_2020.csv')
df24 = pd.read_csv('weather_2021.csv')
df25 = pd.read_csv('weather_2022.csv')
df26 = pd.read_csv('weather_2023.csv')
df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24, df25, df26], ignore_index = True)
df = df.sort_values(by = ['state', 'date'])
df = df.reset_index(drop = True)

In [5]:
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['season'] = ''
df.loc[df['month'].isin([12, 1, 2]), 'season'] = 'Winter'
df.loc[df['month'].isin([3, 4, 5, 6, 7, 8, 9]), 'season'] = 'Summer'
df.loc[df['month'].isin([10, 11]), 'season'] = 'Autumn'
df.drop(columns=['month'], inplace = True)
result = df.groupby(['state', 'year', 'season']).agg({'temperature_2m_min':'mean', 'temperature_2m_max':'mean', 'precipitation_sum':'mean', 'rain_sum':'mean', 'snowfall_sum':'mean'}).reset_index()
result.to_excel('historical_weather.xlsx', index = False)