In [1]:
import pandas as pd
import glob
import os

# Get list of CSV files
csv_files = glob.glob('../Data/powergen_preds/*.csv')

print(f"Number of CSV files found: {len(csv_files)}")

Number of CSV files found: 368


In [2]:


# Read first file to compare structure
df_combined = pd.read_csv(csv_files[0])
first_columns = set(df_combined.columns)

# Merge all files and check structure
for file in csv_files[1:]:
    df = pd.read_csv(file)
    
    # Check if columns match
    if set(df.columns) != first_columns:
        print(f"Warning: {file} has different structure!")
        print(f"Different columns: {set(df.columns).symmetric_difference(first_columns)}")
        continue
        
    df_combined = pd.concat([df_combined, df], ignore_index=True)

# Convert to datetime if exists
if 'current_datetime' in df_combined.columns:
    df_combined['current_datetime'] = pd.to_datetime(df_combined['current_datetime'])
    print("\nDatetime range:")
    print(f"Start: {df_combined['current_datetime'].min()}")
    print(f"End: {df_combined['current_datetime'].max()}")

print(f"\nTotal rows in combined dataset: {len(df_combined)}")


Datetime range:
Start: 2025-04-02 08:17:40
End: 2025-04-10 12:00:05

Total rows in combined dataset: 68646


In [4]:
# Create directory if it doesn't exist
os.makedirs('../src/data/ned-api', exist_ok=True)

# Save to CSV
df_combined.to_csv('../src/data/ned-api/appended_NED_preds_2-10-April.csv', index=False)
print("File saved successfully")

File saved successfully


In [5]:
# Print basic info about the dataframe
print("DataFrame Shape:", df_combined.shape)
print("\nFirst few rows:")
print(df_combined.head())
print("\nDataFrame Info:")
print(df_combined.info())
print("\nSummary Statistics:")
print(df_combined.describe())

DataFrame Shape: (68646, 18)

First few rows:
                            @id        @type           id         point  \
0  /v1/utilizations/68702146339  Utilization  68702146339  /v1/points/0   
1  /v1/utilizations/68702448566  Utilization  68702448566  /v1/points/0   
2  /v1/utilizations/68702752702  Utilization  68702752702  /v1/points/0   
3  /v1/utilizations/68703058767  Utilization  68703058767  /v1/points/0   
4  /v1/utilizations/68703363818  Utilization  68703363818  /v1/points/0   

          type          granularity           granularitytimezone  \
0  /v1/types/1  /v1/granularities/5  /v1/granularity_time_zones/0   
1  /v1/types/1  /v1/granularities/5  /v1/granularity_time_zones/0   
2  /v1/types/1  /v1/granularities/5  /v1/granularity_time_zones/0   
3  /v1/types/1  /v1/granularities/5  /v1/granularity_time_zones/0   
4  /v1/types/1  /v1/granularities/5  /v1/granularity_time_zones/0   

           activity         classification  capacity  volume  percentage  \
0  /v1/activ