In [2]:
import requests
import pandas as pd
from datetime import datetime, timezone
import pytz

API_KEY = "492aa1d288cfdf972a104e0a55c35b68"
BASE_URL_FORECAST = "http://api.openweathermap.org/data/2.5/forecast"
BASE_URL_CURRENT = "http://api.openweathermap.org/data/2.5/weather"

city_list = ["Delhi","Mumbai","Bangalore","Chennai","Kolkata","Hyderabad","Pune",
             "Jaipur","Lucknow","Surat","Kanpur","Nagpur","Indore","Bhopal",
             "Patna","Vadodara","Ludhiana","Agra","Varanasi"]

india_tz = pytz.timezone("Asia/Kolkata")

def to_date_ist(unix_time):
    return datetime.fromtimestamp(unix_time, tz=timezone.utc).astimezone(india_tz).date()

def to_time_ist(unix_time):
    return datetime.fromtimestamp(unix_time, tz=timezone.utc).astimezone(india_tz).strftime("%H:%M")

all_forecast = []

for city in city_list:
    params_current = {"q": city, "appid": API_KEY, "units": "metric"}
    response_current = requests.get(BASE_URL_CURRENT, params=params_current)
    data_current = response_current.json()
    
    if response_current.status_code == 200 and "sys" in data_current:
        sunrise = to_time_ist(data_current["sys"]["sunrise"])
        sunset = to_time_ist(data_current["sys"]["sunset"])
    else:
        sunrise, sunset = None, None

    params = {"q": city, "appid": API_KEY, "units": "metric"}
    response = requests.get(BASE_URL_FORECAST, params=params)
    data = response.json()

    if response.status_code == 200 and "list" in data:
        df = pd.DataFrame([
            {
                "dt": to_date_ist(item["dt"]),
                "hour": datetime.fromtimestamp(item["dt"], tz=timezone.utc).astimezone(india_tz).hour,
                "temp": item["main"]["temp"],
                "feels_like": item["main"]["feels_like"],
                "humidity": item["main"]["humidity"],
                "pressure": item["main"]["pressure"],
                "wind_speed": item["wind"]["speed"],
                "wind_deg": item["wind"].get("deg"),
                "clouds": item["clouds"]["all"],
                "weather_main": item["weather"][0]["main"],
                "weather_desc": item["weather"][0]["description"],
                "pop": item.get("pop", 0) * 100,
                "rain": item.get("rain", {}).get("3h", 0)
            }
            for item in data["list"]
        ])

        for date, grp in df.groupby("dt"):
            forecast = {
                "crawl_date": datetime.now(india_tz).strftime("%Y-%m-%d"),
                "crawl_time": datetime.now(india_tz).strftime("%H:%M"),
                "city": city,
                "forecast_date": str(date),
                "sunrise": sunrise,
                "sunset": sunset,
                "temp_min(°C)": grp["temp"].min(),
                "temp_max(°C)": grp["temp"].max(),
                "morning_temp(°C)": grp.loc[grp["hour"].between(6, 11), "temp"].mean() if not grp.loc[grp["hour"].between(6, 11)].empty else None,
                "day_temp(°C)": grp.loc[grp["hour"].between(12, 17), "temp"].mean() if not grp.loc[grp["hour"].between(12, 17)].empty else None,
                "evening_temp(°C)": grp.loc[grp["hour"].between(18, 21), "temp"].mean() if not grp.loc[grp["hour"].between(18, 21)].empty else None,
                "night_temp(°C)": grp.loc[(grp["hour"] >= 22) | (grp["hour"] <= 5), "temp"].mean() if not grp.loc[(grp["hour"] >= 22) | (grp["hour"] <= 5)].empty else None,
                "pressure(hPa)": grp["pressure"].mean(),
                "humidity(%)": grp["humidity"].mean(),
                "wind_speed(m/s)": grp["wind_speed"].mean(),
                "wind_deg(°)": grp["wind_deg"].mean(),
                "cloudiness(%)": grp["clouds"].mean(),
                "weather_main": grp["weather_main"].mode()[0] if not grp["weather_main"].mode().empty else None,
                "weather_desc": grp["weather_desc"].mode()[0] if not grp["weather_desc"].mode().empty else None,
                "pop(%)": grp["pop"].mean(),
                "rain(mm)": grp["rain"].sum()
            }
            all_forecast.append(forecast)
    else:
        print(f"Failed for {city}: {data}")

df_forecast = pd.DataFrame(all_forecast)
display(df_forecast)


StatementMeta(, 32d73518-325c-492c-9e6c-77dbb50c36f9, 4, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, a67bf453-80f0-427a-98d1-36543a1dc830)

In [3]:
import pandas as pd
from datetime import datetime

# all_weather_data is your list of dicts
india_tz = pytz.timezone('Asia/Kolkata')
today_str = datetime.now(india_tz).strftime("%Y-%m-%d %H:%M:%S")

# save to Lakehouse Files with date in filename
file_path = f"/lakehouse/default/Files/forecast_data_bronze/forecast_data {today_str}.csv"
df_forecast.to_csv(file_path, index=False)

print(f"Saved: {file_path}")

StatementMeta(, 32d73518-325c-492c-9e6c-77dbb50c36f9, 5, Finished, Available, Finished)

Saved: /lakehouse/default/Files/forecast_data_bronze/forecast_data 2025-09-05 18:44:38.csv
