In [9]:
import datetime

"""
Gathering weather data
"""
import requests
import pandas as pd

lat, lon = 40.7128, -74.0060
API_KEY = 'c666368e86e0cc582edb4072c731d626'

weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API_KEY}&units=imperial"

response = requests.get(weather_url)
weather_data = response.json()

weather_data

{'coord': {'lon': -74.006, 'lat': 40.7128},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01d'}],
 'base': 'stations',
 'main': {'temp': 87.4,
  'feels_like': 86.67,
  'temp_min': 83.97,
  'temp_max': 89.35,
  'pressure': 1019,
  'humidity': 38,
  'sea_level': 1019,
  'grnd_level': 1018},
 'visibility': 10000,
 'wind': {'speed': 6.91, 'deg': 220},
 'clouds': {'all': 0},
 'dt': 1754342837,
 'sys': {'type': 1,
  'id': 4610,
  'country': 'US',
  'sunrise': 1754301345,
  'sunset': 1754352504},
 'timezone': -14400,
 'id': 5128581,
 'name': 'New York',
 'cod': 200}

In [1]:
import datetime

"""
Gathering weather data
"""
import requests
import pandas as pd

# NYC coordinates
lat, lon = 40.7128, -74.0060

# Adjust start/end date dynamically if needed
# Get unique date from delay data
delay_date = (datetime.date.today() - datetime.timedelta(days=2)).isoformat()
start_date = delay_date
end_date = delay_date  # Same day, hourly granularity

# Open-Meteo API with temperature in Fahrenheit
weather_url = (
    f"https://archive-api.open-meteo.com/v1/archive?"
    f"latitude={lat}&longitude={lon}"
    f"&start_date={start_date}&end_date={end_date}"
    f"&hourly=temperature_2m,precipitation,snowfall,relative_humidity_2m,windspeed_10m"
    f"&temperature_unit=fahrenheit"
    f"&timezone=America/New_York"
)

# Fetch and convert to DataFrame
response = requests.get(weather_url)
weather_data = response.json()
weather_df = pd.DataFrame(weather_data['hourly'])
weather_df['time'] = pd.to_datetime(weather_df['time'])

weather_df

Unnamed: 0,time,temperature_2m,precipitation,snowfall,relative_humidity_2m,windspeed_10m
0,2025-08-02 00:00:00,65.9,0.0,0.0,90.0,3.6
1,2025-08-02 01:00:00,65.6,0.0,0.0,91.0,2.3
2,2025-08-02 02:00:00,64.9,0.0,0.0,93.0,2.8
3,2025-08-02 03:00:00,63.9,0.0,0.0,95.0,3.5
4,2025-08-02 04:00:00,63.6,0.0,0.0,97.0,4.8
5,2025-08-02 05:00:00,63.8,0.0,0.0,89.0,9.8
6,2025-08-02 06:00:00,62.7,0.0,0.0,84.0,10.1
7,2025-08-02 07:00:00,62.7,0.0,0.0,80.0,9.2
8,2025-08-02 08:00:00,64.1,0.0,0.0,75.0,9.4
9,2025-08-02 09:00:00,67.3,0.0,0.0,73.0,7.6


In [2]:
import datetime
import pandas as pd
from sqlalchemy import create_engine, text

# === Database config ===
DB_USER = "postgres"
DB_PASSWORD = "commiteveryday"
DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "train_delays"

# === Create DB engine ===
engine = create_engine(f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

create_weather_hourly = """
CREATE TABLE IF NOT EXISTS weather_hourly (
    id SERIAL PRIMARY KEY,
    time TIMESTAMP,
    temperature_f REAL,
    precipitation REAL,
    snowfall REAL,
    humidity REAL,
    windspeed REAL
);
"""

with engine.connect() as conn:
    conn.execute(text(create_weather_hourly))
    print("Table created.")
    
# === Insert weather data ===
if not weather_df.empty:
    weather_cols = ['time', 'temperature_2m', 'precipitation', 'snowfall', 'relative_humidity_2m', 'windspeed_10m']
    
    df_weather = weather_df[weather_cols].copy()
    df_weather.columns = ['time', 'temperature_f', 'precipitation', 'snowfall', 'humidity', 'windspeed']
    
    df_weather.to_sql('weather_hourly', engine, if_exists='append', index=False)
    print(f"Inserted {len(df_weather)} weather records.")
else:
    print("No weather data found.")


Table created.
Inserted 24 weather records.


In [10]:
import requests
import pg8000
from datetime import datetime
import time
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class WeatherDataCollector:
    def __init__(self, api_key, db_config=None):
        self.api_key = api_key
        self.base_url = "https://api.openweathermap.org/data/2.5/weather"
        
        # Default Neon DB configuration
        self.db_config = db_config or {
            'user': 'neondb_owner',
            'password': 'npg_VOXZBcRohC81',
            'host': 'ep-spring-truth-ae312q45-pooler.c-2.us-east-2.aws.neon.tech',
            'port': 5432,
            'database': 'neondb'
        }
        
        self.init_database()
    
    def init_database(self):
        """Initialize PostgreSQL database with weather data table"""
        try:
            conn = pg8000.connect(
                user=self.db_config['user'],
                password=self.db_config['password'],
                host=self.db_config['host'],
                port=self.db_config['port'],
                database=self.db_config['database']
            )
            cursor = conn.cursor()
            
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS weather_data (
                    id SERIAL PRIMARY KEY,
                    timestamp TIMESTAMP,
                    location_name TEXT,
                    latitude REAL,
                    longitude REAL,
                    
                    -- Temperature data (in Fahrenheit)
                    temp_fahrenheit REAL,
                    feels_like_fahrenheit REAL,
                    temp_min_fahrenheit REAL,
                    temp_max_fahrenheit REAL,
                    
                    -- Weather conditions
                    weather_main TEXT,
                    weather_description TEXT,
                    weather_id INTEGER,
                    
                    -- Atmospheric data
                    pressure INTEGER,
                    humidity INTEGER,
                    sea_level_pressure INTEGER,
                    ground_level_pressure INTEGER,
                    visibility INTEGER,
                    
                    -- Wind data
                    wind_speed REAL,
                    wind_direction INTEGER,
                    wind_gust REAL,
                    
                    -- Precipitation data
                    rain_1h REAL,
                    rain_3h REAL,
                    snow_1h REAL,
                    snow_3h REAL,
                    
                    -- Cloud data
                    cloudiness INTEGER,
                    
                    -- Additional fields for MTA analysis
                    is_precipitation BOOLEAN,
                    is_snow BOOLEAN,
                    is_extreme_temp BOOLEAN,
                    is_high_humidity BOOLEAN,
                    is_high_wind BOOLEAN,
                    weather_severity_score INTEGER,
                    
                    -- Timestamps
                    sunrise_time TIMESTAMP,
                    sunset_time TIMESTAMP,
                    data_timestamp TIMESTAMP
                )
            ''')
            
            conn.commit()
            conn.close()
            logger.info("PostgreSQL database initialized successfully")
        except pg8000.Error as e:
            logger.error(f"Database initialization error: {e}")
            raise
    
    def kelvin_to_fahrenheit(self, kelvin):
        """Convert Kelvin directly to Fahrenheit"""
        return (kelvin - 273.15) * 9/5 + 32
    
    def calculate_weather_severity_score(self, weather_data):
        """
        Calculate a weather severity score (1-10) for MTA delay correlation
        Higher scores indicate more severe weather conditions
        """
        score = 1
        
        # Temperature extremes
        temp_f = weather_data.get('temp_fahrenheit', 32)
        if temp_f < 14 or temp_f > 95:  # Extreme temperatures
            score += 3
        elif temp_f < 32 or temp_f > 86:  # Very cold/hot
            score += 2
        elif temp_f < 41 or temp_f > 77:  # Cold/warm
            score += 1
        
        # Precipitation
        rain_1h = weather_data.get('rain_1h', 0) or 0
        snow_1h = weather_data.get('snow_1h', 0) or 0
        
        if snow_1h > 5:  # Heavy snow
            score += 3
        elif snow_1h > 0:  # Any snow
            score += 2
        elif rain_1h > 10:  # Heavy rain
            score += 2
        elif rain_1h > 2:  # Moderate rain
            score += 1
        
        # Wind
        wind_speed = weather_data.get('wind_speed', 0) or 0
        if wind_speed > 15:  # High wind
            score += 2
        elif wind_speed > 10:  # Moderate wind
            score += 1
        
        # Humidity
        humidity = weather_data.get('humidity', 0) or 0
        if humidity > 90:  # Very high humidity
            score += 1
        
        # Visibility
        visibility = weather_data.get('visibility', 10000) or 10000
        if visibility < 1000:  # Poor visibility
            score += 2
        elif visibility < 5000:  # Reduced visibility
            score += 1
        
        return min(score, 10)  # Cap at 10
    
    def fetch_weather_data(self, lat, lon, location_name=None):
        """Fetch weather data from OpenWeatherMap API"""
        params = {
            'lat': lat,
            'lon': lon,
            'appid': self.api_key
        }
        
        try:
            response = requests.get(self.base_url, params=params, timeout=10)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching weather data: {e}")
            return None
    
    def process_weather_data(self, raw_data, location_name=None):
        """Process raw weather data into structured format"""
        if not raw_data:
            return None
        
        # Extract basic info
        coord = raw_data.get('coord', {})
        main = raw_data.get('main', {})
        weather = raw_data.get('weather', [{}])[0]
        wind = raw_data.get('wind', {})
        rain = raw_data.get('rain', {})
        snow = raw_data.get('snow', {})
        clouds = raw_data.get('clouds', {})
        sys = raw_data.get('sys', {})
        
        # Temperature conversions
        temp_f = self.kelvin_to_fahrenheit(main.get('temp', 0))
        feels_like_f = self.kelvin_to_fahrenheit(main.get('feels_like', 0))
        temp_min_f = self.kelvin_to_fahrenheit(main.get('temp_min', 0))
        temp_max_f = self.kelvin_to_fahrenheit(main.get('temp_max', 0))
        
        processed_data = {
            'timestamp': datetime.now(),
            'location_name': location_name or raw_data.get('name', 'Unknown'),
            'latitude': coord.get('lat'),
            'longitude': coord.get('lon'),
            
            # Temperature
            'temp_fahrenheit': round(temp_f, 2),
            'feels_like_fahrenheit': round(feels_like_f, 2),
            'temp_min_fahrenheit': round(temp_min_f, 2),
            'temp_max_fahrenheit': round(temp_max_f, 2),
            
            # Weather conditions
            'weather_main': weather.get('main'),
            'weather_description': weather.get('description'),
            'weather_id': weather.get('id'),
            
            # Atmospheric
            'pressure': main.get('pressure'),
            'humidity': main.get('humidity'),
            'sea_level_pressure': main.get('sea_level'),
            'ground_level_pressure': main.get('grnd_level'),
            'visibility': raw_data.get('visibility'),
            
            # Wind
            'wind_speed': wind.get('speed'),
            'wind_direction': wind.get('deg'),
            'wind_gust': wind.get('gust'),
            
            # Precipitation
            'rain_1h': rain.get('1h'),
            'rain_3h': rain.get('3h'),
            'snow_1h': snow.get('1h'),
            'snow_3h': snow.get('3h'),
            
            # Clouds
            'cloudiness': clouds.get('all'),
            
            # Analysis flags
            'is_precipitation': bool(rain.get('1h', 0) or snow.get('1h', 0)),
            'is_snow': bool(snow.get('1h', 0)),
            'is_extreme_temp': temp_f < 23 or temp_f > 95,
            'is_high_humidity': main.get('humidity', 0) > 85,
            'is_high_wind': wind.get('speed', 0) > 12,
            
            # Timestamps
            'sunrise_time': datetime.fromtimestamp(sys.get('sunrise', 0)) if sys.get('sunrise') else None,
            'sunset_time': datetime.fromtimestamp(sys.get('sunset', 0)) if sys.get('sunset') else None,
            'data_timestamp': datetime.fromtimestamp(raw_data.get('dt', 0)) if raw_data.get('dt') else None
        }
        
        # Calculate severity score
        processed_data['weather_severity_score'] = self.calculate_weather_severity_score(processed_data)
        
        return processed_data
    
    def save_to_database(self, weather_data):
        """Save processed weather data to PostgreSQL database"""
        if not weather_data:
            return False
        
        try:
            conn = pg8000.connect(
                user=self.db_config['user'],
                password=self.db_config['password'],
                host=self.db_config['host'],
                port=self.db_config['port'],
                database=self.db_config['database']
            )
            cursor = conn.cursor()
            
            columns = list(weather_data.keys())
            placeholders = ', '.join(['%s' for _ in columns])
            column_names = ', '.join(columns)
            
            query = f"INSERT INTO weather_data ({column_names}) VALUES ({placeholders})"
            
            cursor.execute(query, list(weather_data.values()))
            conn.commit()
            logger.info(f"Weather data saved for {weather_data['location_name']}")
            return True
        except pg8000.Error as e:
            logger.error(f"Database error: {e}")
            return False
        finally:
            if conn:
                conn.close()
    
    def collect_weather_data(self, locations):
        """
        Collect weather data for multiple locations
        locations: list of tuples [(lat, lon, name), ...]
        """
        results = []
        
        for lat, lon, name in locations:
            logger.info(f"Fetching weather data for {name}")
            
            # Fetch raw data
            raw_data = self.fetch_weather_data(lat, lon, name)
            
            if raw_data:
                # Process data
                processed_data = self.process_weather_data(raw_data, name)
                
                if processed_data:
                    # Save to database
                    if self.save_to_database(processed_data):
                        results.append(processed_data)
                    
                    # Print summary
                    print(f"\n{name}:")
                    print(f"  Temperature: {processed_data['temp_fahrenheit']:.1f}°F")
                    print(f"  Feels like: {processed_data['feels_like_fahrenheit']:.1f}°F")
                    print(f"  Conditions: {processed_data['weather_description']}")
                    print(f"  Humidity: {processed_data['humidity']}%")
                    print(f"  Wind: {processed_data['wind_speed']} m/s")
                    if processed_data['rain_1h']:
                        print(f"  Rain (1h): {processed_data['rain_1h']} mm")
                    if processed_data['snow_1h']:
                        print(f"  Snow (1h): {processed_data['snow_1h']} mm")
                    print(f"  Weather Severity Score: {processed_data['weather_severity_score']}/10")
            
            # Be respectful to the API
            time.sleep(1)
        
        return results

# Example usage and NYC locations for MTA analysis
def main():
    API_KEY = 'c666368e86e0cc582edb4072c731d626'
    
    collector = WeatherDataCollector(API_KEY)
    
    # NYC area locations for MTA analysis
    nyc_locations = [
        (40.7831, -73.9712, "NYC_Manhattan"),
        (40.6782, -73.9442, "NYC_Brooklyn"), 
        (40.7282, -73.7949, "NYC_Queens"),
        (40.7505, -73.9934, "NYC_Times_Square"),
        (40.6892, -74.0445, "NYC_Lower_Manhattan")
    ]
    
    # Collect weather data
    results = collector.collect_weather_data(nyc_locations)
    
    print(f"\nCollected weather data for {len(results)} locations")
    print("Data saved to Neon PostgreSQL database")

if __name__ == "__main__":
    main()

2025-08-04 17:45:40,296 - INFO - PostgreSQL database initialized successfully
2025-08-04 17:45:40,299 - INFO - Fetching weather data for NYC_Manhattan
2025-08-04 17:45:40,966 - INFO - Weather data saved for NYC_Manhattan



NYC_Manhattan:
  Temperature: 86.8°F
  Feels like: 86.2°F
  Conditions: clear sky
  Humidity: 39%
  Wind: 3.09 m/s
  Weather Severity Score: 3/10


2025-08-04 17:45:41,973 - INFO - Fetching weather data for NYC_Brooklyn
2025-08-04 17:45:42,622 - INFO - Weather data saved for NYC_Brooklyn



NYC_Brooklyn:
  Temperature: 85.6°F
  Feels like: 85.8°F
  Conditions: smoke
  Humidity: 44%
  Wind: 4.63 m/s
  Weather Severity Score: 2/10


2025-08-04 17:45:43,629 - INFO - Fetching weather data for NYC_Queens
2025-08-04 17:45:44,264 - INFO - Weather data saved for NYC_Queens



NYC_Queens:
  Temperature: 84.4°F
  Feels like: 84.8°F
  Conditions: smoke
  Humidity: 46%
  Wind: 4.63 m/s
  Weather Severity Score: 2/10


2025-08-04 17:45:45,279 - INFO - Fetching weather data for NYC_Bronx
2025-08-04 17:45:45,963 - INFO - Weather data saved for NYC_Bronx



NYC_Bronx:
  Temperature: 86.3°F
  Feels like: 86.0°F
  Conditions: broken clouds
  Humidity: 41%
  Wind: 3.09 m/s
  Weather Severity Score: 3/10


2025-08-04 17:45:46,972 - INFO - Fetching weather data for NYC_Staten_Island
2025-08-04 17:45:47,613 - INFO - Weather data saved for NYC_Staten_Island



NYC_Staten_Island:
  Temperature: 85.6°F
  Feels like: 86.8°F
  Conditions: scattered clouds
  Humidity: 48%
  Wind: 3.09 m/s
  Weather Severity Score: 2/10


2025-08-04 17:45:48,620 - INFO - Fetching weather data for NYC_Times_Square
2025-08-04 17:45:49,299 - INFO - Weather data saved for NYC_Times_Square



NYC_Times_Square:
  Temperature: 87.3°F
  Feels like: 86.5°F
  Conditions: clear sky
  Humidity: 38%
  Wind: 3.09 m/s
  Weather Severity Score: 3/10


2025-08-04 17:45:50,311 - INFO - Fetching weather data for NYC_Lower_Manhattan
2025-08-04 17:45:51,003 - INFO - Weather data saved for NYC_Lower_Manhattan



NYC_Lower_Manhattan:
  Temperature: 87.0°F
  Feels like: 87.1°F
  Conditions: clear sky
  Humidity: 42%
  Wind: 3.09 m/s
  Weather Severity Score: 3/10

Collected weather data for 7 locations
Data saved to Neon PostgreSQL database
