In [None]:
!pip install requests pandas python-dotenv schedule


In [None]:
from dotenv import load_dotenv
import os

# Load API key securely
load_dotenv()
api_key = os.getenv("OPENWEATHER_API_KEY")



In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("OPENWEATHER_API_KEY")
print("API Key Loaded Securely:", API_KEY[:4] + "****")


In [None]:
import requests

def extract_weather_data(cities):
    """
    Fetch weather data for a list of cities from OpenWeatherMap API.
    Returns a list of dictionaries (raw API data).
    """
    weather_data = []

    for city in cities:
        url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={API_KEY}&units=metric"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            weather_data.append(data)
        else:
            print(f"Failed to fetch data for {city}")

    return weather_data

# Test the function
cities = ["Delhi", "Mumbai", "London"]
raw_data = extract_weather_data(cities)
print(f"Fetched weather data for {len(raw_data)} cities.")


In [None]:
import pandas as pd
from datetime import datetime

def transform_weather_data(raw_data):
    """
    Transform raw weather data JSON into a clean Pandas DataFrame.
    """
    transformed_data = []

    for data in raw_data:
        city = data.get("name")
        temperature = data["main"]["temp"]
        humidity = data["main"]["humidity"]
        weather = data["weather"][0]["description"]
        timestamp = datetime.fromtimestamp(data["dt"]).strftime('%Y-%m-%d %H:%M:%S')

        transformed_data.append({
            "city": city,
            "temperature": temperature,
            "humidity": humidity,
            "weather": weather,
            "timestamp": timestamp
        })

    df = pd.DataFrame(transformed_data)
    return df

# Test the function
weather_df = transform_weather_data(raw_data)
print(weather_df)


In [None]:
def load_weather_data(df, filename="weather_data.csv"):
    """
    Save the weather DataFrame to a CSV file.
    """
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Test the function
load_weather_data(weather_df)


In [None]:
import schedule
import time

def run_pipeline():
    print("\nRunning ETL pipeline...")
    cities = ["Delhi", "Mumbai", "London"]
    raw_data = extract_weather_data(cities)
    if raw_data:
        weather_df = transform_weather_data(raw_data)
        load_weather_data(weather_df)
        print(weather_df)
    else:
        print("No data fetched.")

# Schedule the job every 1 minute
schedule.every(1).minutes.do(run_pipeline)

# Run the scheduler for 5 minutes (demo)
start_time = time.time()
while time.time() - start_time < 300:  # 300 sec = 5 min
    schedule.run_pending()
    time.sleep(1)
