In [7]:
import requests
import pandas as pd
from datetime import datetime, timezone
import pytz

API_KEY = "492aa1d288cfdf972a104e0a55c35b68"
BASE_URL = "https://api.openweathermap.org/data/2.5/forecast"

cities = [
    "Delhi","Mumbai","Bangalore","Chennai","Kolkata","Hyderabad","Pune",
    "Jaipur","Lucknow","Surat","Kanpur","Nagpur","Indore","Bhopal",
    "Patna","Vadodara","Ludhiana","Agra","Varanasi"
]

india_tz = pytz.timezone("Asia/Kolkata")
crawl_time = datetime.now(india_tz).strftime("%Y-%m-%d %H:%M")
today_str = datetime.now(india_tz).strftime("%Y-%m-%d")   # today's date in IST

rows = []

for city in cities:
    try:
        r = requests.get(BASE_URL, params={"q": city, "appid": API_KEY, "units": "metric"}, timeout=10)
        data = r.json()
        if r.status_code != 200:
            print(f"[WARN] {city}: {data}")
            continue

        forecasts = data["list"]

        for f in forecasts:
            # Convert forecast time to IST
            dt_ist = datetime.fromtimestamp(f["dt"], tz=timezone.utc).astimezone(india_tz)
            dt_date = dt_ist.strftime("%Y-%m-%d")

            # ✅ Keep ALL of today's forecasts (00:00–23:59 IST)
            if dt_date != today_str:
                continue

            main = f["main"]
            weather = f["weather"][0]
            wind = f.get("wind", {})
            clouds = f.get("clouds", {})

            rows.append({
                "crawl_time": crawl_time,
                "city": data["city"]["name"],
                "forecast_time": dt_ist.strftime("%Y-%m-%d %H:%M:%S"),
                "temp_c": main["temp"],
                "feels_like_c": main["feels_like"],
                "temp_min_c": main["temp_min"],
                "temp_max_c": main["temp_max"],
                "pressure_hpa": main["pressure"],
                "humidity_pct": main["humidity"],
                "wind_speed_ms": wind.get("speed"),
                "wind_deg": wind.get("deg"),
                "cloudiness_pct": clouds.get("all"),
                "weather_main": weather.get("main"),
                "weather_desc": weather.get("description"),
                "pop": f.get("pop", 0),
                "rain_3h_mm": f.get("rain", {}).get("3h", 0)
            })

    except Exception as e:
        print(f"[ERR] {city}: {e}")

df_weather = pd.DataFrame(rows) 
display(df_weather)

StatementMeta(, 6aafe284-b1a2-4282-93a5-31ab96ce7d63, 9, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 25b282fc-c87c-4f5f-a8d5-51dc4e66b915)

In [2]:
import pandas as pd
from datetime import datetime

india_tz = pytz.timezone('Asia/Kolkata')
today_str = datetime.now(india_tz).strftime("%Y-%m-%d %H:%M:%S")

file_path = f"/lakehouse/default/Files/hourly_data_bronze/hourly_data {today_str}.csv"
df_weather.to_csv(file_path, index=False)

print(f"Saved: {file_path}")

StatementMeta(, 1d71932e-b9ac-488f-b002-76b02c515ae5, 4, Finished, Available, Finished)

Saved: /lakehouse/default/Files/hourly_data_bronze/hourly_data 2025-08-30 07:14:19.csv


In [9]:
import requests, pandas as pd
from datetime import datetime, timezone
from time import sleep
import pytz

API_KEY = "492aa1d288cfdf972a104e0a55c35b68"
BASE_URL_FORECAST = "https://api.openweathermap.org/data/2.5/forecast"

city_list = ["Delhi","Mumbai","Bangalore","Chennai","Kolkata","Hyderabad","Pune",
             "Jaipur","Lucknow","Surat","Kanpur","Nagpur","Indore","Bhopal",
             "Patna","Vadodara","Ludhiana","Agra","Varanasi"]

session = requests.Session()
session.headers.update({"User-Agent":"weather-crawler/1.0"})
timeout = 15 

india_tz = pytz.timezone("Asia/Kolkata")
crawl_time_ist = datetime.now(india_tz).strftime("%Y-%m-%d %H:%M:%S")

rows = []

for city in city_list:
    params = {"q": city, "appid": API_KEY, "units": "metric"}
    try:
        r = session.get(BASE_URL_FORECAST, params=params, timeout=timeout)
        j = r.json()
        if r.status_code != 200:
            print(f"[WARN] Forecast failed for {city}: {j}")
            continue

        city_meta   = j.get("city", {}) or {}
        country     = city_meta.get("country")
        coord       = city_meta.get("coord", {}) or {}
        tz_offset   = city_meta.get("timezone")  # seconds from UTC; may be None
        # city sunrise/sunset are unix UTC seconds
        sr_unix     = city_meta.get("sunrise")
        ss_unix     = city_meta.get("sunset")
        def to_utc_dt(unix):
            if unix is None: return None
            return datetime.fromtimestamp(unix, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")

        def to_ist_dt(unix):
            if unix is None: return None
            return datetime.fromtimestamp(unix, tz=timezone.utc).astimezone(india_tz).strftime("%Y-%m-%d %H:%M:%S")

        sunrise_utc = to_utc_dt(sr_unix)
        sunset_utc  = to_utc_dt(ss_unix)
        sunrise_ist = to_ist_dt(sr_unix)
        sunset_ist  = to_ist_dt(ss_unix)

        for item in j.get("list", []):
            main   = item.get("main", {}) or {}
            wind   = item.get("wind", {}) or {}
            clouds = item.get("clouds", {}) or {}
            wx0    = (item.get("weather") or [{}])[0]
            rain   = item.get("rain", {}) or {}
            snow   = item.get("snow", {}) or {}

            rows.append({
                # lineage / meta
                "crawl_time":        crawl_time_ist,                     # when we crawled (IST)
                "city":              city_meta.get("name", city),
                "city_lat":          coord.get("lat"),
                "city_lon":          coord.get("lon"),
                "timezone_offset_s": tz_offset,

                # forecast timestamp (OpenWeather provides local dt_txt)
                "forecast_time":     item.get("dt_txt"),

                # weather metrics
                "temp_c":            main.get("temp"),
                "feels_like_c":      main.get("feels_like"),
                "temp_min_c":        main.get("temp_min"),
                "temp_max_c":        main.get("temp_max"),
                "pressure_hpa":      main.get("pressure"),
                "humidity_pct":      main.get("humidity"),
                "visibility_m":      item.get("visibility"),

                "wind_speed_ms":     wind.get("speed"),
                "wind_deg":          wind.get("deg"),
                "cloudiness_pct":    clouds.get("all"),

                "weather_main":      wx0.get("main"),
                "weather_desc":      wx0.get("description"),

                "pop":               item.get("pop"),   
                "rain_3h_mm":        rain.get("3h")
            })

        sleep(0.3)

    except Exception as e:
        print(f"[ERR] {city}: {e}")

df_fc = pd.DataFrame(rows)
display(df_fc.head(10))

StatementMeta(, 6aafe284-b1a2-4282-93a5-31ab96ce7d63, 11, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 08e94372-df31-4c67-ac49-b8d08ded1625)