# Imported necessary modules

In [1]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import json
import os

# get_weather_data():
- The get_weather_data function retrieves daily weather observations for Istanbul over any date range you specify (in DD.MM.YYYY format). If you don’t supply an API key—or choose to simulate data by answering “y” at the prompt—it will generate realistic synthetic values for temperature (with a seasonal trend plus noise), precipitation (with an approximate 30% chance per day), and cloud cover (linked to rainfall) for each date, formatting them back into DD.MM.YYYY. Otherwise it makes individual calls to OpenWeatherMap’s One Call API (noting that in practice you’re limited to seven‐day forecasts) and extracts the daily temperature, rain volume, and cloud cover. In both cases it collects the results into a list of dictionaries, converts that list into a pandas DataFrame, writes out both CSV and JSON copies to raw_datas/weather_data_istanbul.*, and finally returns the DataFrame for further use.

In [2]:
def get_weather_data(start_date, end_date, location="Istanbul", api_key=None):
    """
    Fetches weather data for Istanbul between specified dates.
    
    Note: The OpenWeatherMap API normally provides a maximum 7-day forecast, so
    this script is conceptually correct, but in real applications, a different
    approach may be needed for long-term forecasts.
    
    Args:
        start_date (str): Start date (in DD.MM.YYYY format)
        end_date (str): End date (in DD.MM.YYYY format)
        location (str): City name (default: Istanbul)
        api_key (str): OpenWeatherMap API key
    
    Returns:
        DataFrame: DataFrame containing weather data
    """
    # Check API key
    if api_key is None:
        api_key = input("Enter your OpenWeatherMap API key: ")
    
    # Istanbul coordinates
    lat = 41.0082
    lon = 28.9784
    
    # Convert start and end dates to datetime objects
    start = datetime.strptime(start_date, "%d.%m.%Y")
    end = datetime.strptime(end_date, "%d.%m.%Y")
    
    # Create list containing all days between the two dates
    date_range = []
    current_date = start
    while current_date <= end:
        date_range.append(current_date.strftime("%Y-%m-%d"))
        current_date += timedelta(days=1)
    
    # Data storage list
    weather_data = []
    
    # Sample data and simulation option
    use_simulation = input("Would you like to use simulation data instead of OpenWeatherMap API? (y/n): ").lower() == 'y'
    
    if use_simulation:
        print(f"Creating simulation data ({start_date} - {end_date})...")
        import numpy as np
        
        # Realistic values for weather
        # Istanbul March-April temperatures are approximately between 8-18°C
        temp_min = 8
        temp_max = 18
        
        for date in date_range:
            # Create realistic weather simulation
            day_of_year = datetime.strptime(date, "%Y-%m-%d").timetuple().tm_yday
            
            # Temperature: Increasing trend from early March to late April
            progress = (day_of_year - 70) / 90  # 70: early March (approx), 90: March-April (days)
            base_temp = temp_min + progress * (temp_max - temp_min)
            random_factor = np.random.normal(0, 2)  # Daily fluctuations
            temp = round(base_temp + random_factor, 1)
            
            # Precipitation: Istanbul has about 10-15 rainy days in March-April
            # Approximately 30% chance of precipitation
            precipitation = round(np.random.exponential(2) if np.random.random() < 0.3 else 0, 1)
            
            # Cloud cover: Higher cloud cover if there's precipitation
            cloud_cover = min(100, int(np.random.normal(50, 30) + precipitation * 5))
            
            # Store data
            formatted_date = datetime.strptime(date, "%Y-%m-%d").strftime("%d.%m.%Y")
            weather_data.append({
                'date': formatted_date,
                'avg_temp': temp,
                'precipitation': precipitation,
                'cloud_cover': cloud_cover
            })
    else:
        # Code for real API call
        print(f"Fetching data from OpenWeatherMap API ({start_date} - {end_date})...")
        
        # OpenWeatherMap API typically doesn't provide more than 7 days of forecast
        # Therefore, daily calls to the API are best
        base_url = "https://api.openweathermap.org/data/2.5/onecall"
        
        for date in date_range:
            params = {
                'lat': lat,
                'lon': lon,
                'exclude': 'current,minutely,hourly,alerts',
                'appid': api_key,
                'units': 'metric',
                'dt': int(datetime.strptime(date, "%Y-%m-%d").timestamp())  # Unix timestamp
            }
            
            try:
                response = requests.get(base_url, params=params)
                if response.status_code == 200:
                    data = response.json()
                    
                    # Get daily forecast data
                    for day in data.get('daily', []):
                        day_date = datetime.fromtimestamp(day['dt']).strftime("%d.%m.%Y")
                        
                        if day_date in [datetime.strptime(d, "%Y-%m-%d").strftime("%d.%m.%Y") for d in date_range]:
                            weather_data.append({
                                'date': day_date,
                                'avg_temp': day['temp']['day'],
                                'precipitation': day.get('rain', 0) if 'rain' in day else 0,
                                'cloud_cover': day['clouds']
                            })
                else:
                    print(f"API Error: {response.status_code} - {response.text}")
                
                # Wait to avoid exceeding API limits
                time.sleep(1)
                
            except Exception as e:
                print(f"Error: {e}")
    
    # Create DataFrame
    weather_df = pd.DataFrame(weather_data)
    
    # Save to CSV
    output_file = 'raw_datas/weather_data_istanbul.csv'
    weather_df.to_csv(output_file, index=False)
    print(f"Weather data saved to {output_file}.")
    
    # Save to JSON format as well
    with open('raw_datas/weather_data_istanbul.json', 'w') as f:
        json.dump(weather_data, f, indent=4)
    
    return weather_df

# By giving the start and end dates, the function retrieves the weather data for the specified date range.

In [None]:
if __name__ == "__main__":
    print("Weather Data Collection Tool for Istanbul")
    print("------------------------------------------\n")
    
    start_date = "11.03.2025"
    end_date = "23.04.2025"
    
    print(f"Date Range: {start_date} - {end_date}")
    print("Location: Istanbul, Turkey\n")
    
    # Get API key from environment variable or ask user
    api_key = os.environ.get('OPENWEATHERMAP_API_KEY')
    
    # Fetch weather data
    df = get_weather_data(start_date, end_date, api_key=api_key)
    
    # Summary statistics
    print("\nSummary Statistics:")
    print(f"Total number of days: {len(df)}")
    print(f"Average temperature: {df['avg_temp'].mean():.1f}°C")
    print(f"Average cloud cover: {df['cloud_cover'].mean():.1f}%")
    print(f"Total precipitation: {df['precipitation'].sum():.1f} mm")
    
    print("\nData successfully saved.")