In [None]:
import os
import pandas as pd

data_folder = '..\\data\\raw'
years = ['2022', '2023', '2024']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Initialize lists to store results
locations_with_no_missing_data = []
locations_with_no_zeros = []

# Load all datasets
for year in years:
    for month in months:
        filename = f'{month}_{year}.csv'
        file_path = os.path.join(data_folder, filename)
        if os.path.exists(file_path):
            try:
                df = pd.read_csv(file_path)

                # Check for missing data
                locations_with_no_missing_data_in_file = df.columns[df.notna().all()].tolist()
                locations_with_no_missing_data.extend(locations_with_no_missing_data_in_file)

                # Check for zero values (excluding Date and Hour)
                locations_with_no_zeros_in_file = df.columns[(df != 0).all() & (~df.columns.isin(['Date', 'Hour']))].tolist()
                locations_with_no_zeros.extend(locations_with_no_zeros_in_file)

            except Exception as e:
                print(f"Error loading {filename}: {e}")
        else:
            print(f'{filename} not found in {year}')

# Get unique locations that meet criteria
locations_with_no_missing_data = list(set(locations_with_no_missing_data))
locations_with_no_zeros = list(set(locations_with_no_zeros))

# Print results
print("Locations with no missing data across all files:")
print(locations_with_no_missing_data)
print("\nLocations with no zero values across all files (excluding Date and Hour):")
print(locations_with_no_zeros)

# Locations meeting both criteria
locations_meeting_both_criteria = list(set(locations_with_no_missing_data) & set(locations_with_no_zeros))
print("\nLocations meeting both criteria:")
print(locations_meeting_both_criteria)

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

data_folder = '..\\data\\raw'
years = ['2022', '2023', '2024']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Placeholder for combined data
combined_data = pd.DataFrame()

# Load all datasets and combine
for year in years:
    for month in months:
        filename = f"{month}_{year}.csv"
        file_path = os.path.join(data_folder, filename)
        if os.path.exists(file_path):
            try:
                df = pd.read_csv(file_path)
                combined_data = pd.concat([combined_data, df], ignore_index=True)
            except Exception as e:
                print(f"Error loading {filename}: {e}")
        else:
            print(f"{filename} not found")

# Ensure Date and Hour columns are combined into a datetime index
if not combined_data.empty:
    combined_data['Datetime'] = pd.to_datetime(
        combined_data['Date'], format='%d/%m/%Y', errors='coerce'
    ) + pd.to_timedelta(combined_data['Hour'], unit='h')
    combined_data.set_index('Datetime', inplace=True)
    combined_data.sort_index(inplace=True)

    # Clean numeric columns (excluding Date, Hour, and Datetime)
    for column in combined_data.columns:
        if column not in ['Date', 'Hour', 'Datetime']:
            combined_data[column] = pd.to_numeric(combined_data[column], errors='coerce')

# Plot for all locations (excluding Date, Hour, and Datetime)
for column in combined_data.columns:
    if column not in ['Date', 'Hour', 'Datetime']:
        plt.figure(figsize=(12, 6))
        plt.plot(combined_data.index, combined_data[column], label=column)
        plt.title(f"Time Series for {column} (Hourly)", fontsize=14)
        plt.xlabel("Datetime", fontsize=12)
        plt.ylabel("Value", fontsize=12)
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()