### Check for columns containing NaN

In [None]:
import pandas as pd
import numpy as np

file_path = 'csv/residential_emm.csv'
df = pd.read_csv(file_path)

columns_with_nan = df.columns[df.isna().any()].tolist()
# columns_with_nan = df.columns[(df.isna() | (df == '')).any()].tolist()

print("Columns containing NaN values:", columns_with_nan)


### Replace NaN with 0

In [None]:
import pandas as pd
import numpy as np

# Example DataFrame with NaN values
data = {
    'A': [1, 2, np.nan, 4],
    'B': [np.nan, 2, 3, 4],
    'C': [1, np.nan, 3, np.nan]
}

df = pd.DataFrame(data)

# Replace NaN with 0
df.fillna(0, inplace=True)

# Display the updated DataFrame
print(df)


### Check Nan in JSON

In [2]:
import json
import numpy as np


# Recursive function to find and print keys where the final value is NaN
def find_nan_keys(d, parent_key=''):
    if isinstance(d, dict):
        for key, value in d.items():
            full_key = f"{parent_key}.{key}" if parent_key else key
            find_nan_keys(value, full_key)
    elif isinstance(d, list):
        for i, item in enumerate(d):
            full_key = f"{parent_key}[{i}]"
            find_nan_keys(item, full_key)
    else:
        if isinstance(d, float) and np.isnan(d):
            print(f"Key with NaN value: {parent_key}")


file_path = 'json/alldecimals/tsv_load_emm_2024.json'
with open(file_path, 'r') as file:
    data = json.load(file)
    
find_nan_keys(data)


### ComStock: Remove rows with '2019-01-01 01:00:00.000'

In [None]:
import pandas as pd

# Load the CSV file
file_path = 'csv/commercial_state.csv'
df = pd.read_csv(file_path)

# Convert the 'timestamp_hour' column to datetime if it's not already
df['timestamp_hour'] = pd.to_datetime(df['timestamp_hour'])

# Remove rows where 'timestamp_hour' equals '2019-01-01 01:00:00.000'
df_filtered = df[df['timestamp_hour'] != '2019-01-01 01:00:00.000']

# Save the filtered DataFrame back to a CSV file
output_file_path = 'csv/filtered/commercial_state.csv'
df_filtered.to_csv(output_file_path, index=False)

print(f"Filtered data saved to {output_file_path}")


### JSON: check sums to one and count 8760

In [5]:
import json
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.max_colwidth', None) # Show full width of the columns

# Load the JSON file
file_path = 'json/6decimals/tsv_load_State.json'
with open(file_path, 'r') as file:
    data = json.load(file)

# Revised code to calculate the sum of the values instead of counting them
results = []
for key1, value1 in data.items():
    if isinstance(value1, dict):
        for key2, value2 in value1.items():
            if isinstance(value2, dict):
                for key3, value3 in value2.items():
                    if isinstance(value3, dict):
                        if "load shape" in value3:
                            load_shape_values = value3["load shape"]
                            for sub_key, sub_value in load_shape_values.items():
                                count = len(sub_value)  # Count the length of each list in "load_shape"
                                # results.append((key2, key3, sub_key, count))
                                total_sum = round(sum(sub_value), 0)  # Calculate the sum of each list in "load_shape"
                                results.append((key2, key3, sub_key, count, total_sum))

# Create a DataFrame to display the results
results_df = pd.DataFrame(results, columns=['Second Key', 'Third Key', 'Load Shape Value', 'Count', 'Sum'])
print(results_df)

# # Add a column to sum all the sums for each combination of second key and third key
# results_df['Total Sum'] = results_df.groupby(['Second Key', 'Third Key'])['Sum'].transform('sum')

# import ace_tools as tools; tools.display_dataframe_to_user(name="Load Shape Values with Total Sum", dataframe=results_df)


                  Second Key             Third Key Load Shape Value  Count  \
0                    heating                    MF               AL   8760   
1                    heating                    MF               AR   8760   
2                    heating                    MF               AZ   8760   
3                    heating                    MF               CA   8760   
4                    heating                    MF               CO   8760   
5                    heating                    MF               CT   8760   
6                    heating                    MF               DC   8760   
7                    heating                    MF               DE   8760   
8                    heating                    MF               FL   8760   
9                    heating                    MF               GA   8760   
10                   heating                    MF               IA   8760   
11                   heating                    MF              

### CSV: check sums to one and count 8760

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.max_colwidth', None) # Show full width of the columns

wname = "state"
file_path = f'csv/commercial_{wname}.csv'
df = pd.read_csv(file_path)

unique_emm = df[wname].unique()
unique_building_type = df['building_type'].unique()

# Print unique values
print(f"Unique values for '{wname}':", unique_emm)
print("Unique values for 'building_type':", unique_building_type)

grouped_lengths = df.groupby([wname, 'building_type']).size().reset_index(name='count')

# Display the results
print(f"\nLength of each combination of {wname} and 'building_type':")
print(grouped_lengths)
