#AQ

In [13]:

# Import Data
import pandas as pd
import arcpy
import os

# 1. Specify File Paths
file_paths = [r"F:\\Research and Analysis\\Air Quality\\Annual Reports DRI\\AQ data 2023.xlsx",
              r"F:\\Research and Analysis\\Air Quality\\Annual Reports DRI\\AQ data 2022.xlsx",
              r"F:\\Research and Analysis\\Air Quality\\Annual Reports DRI\\AQ data 2021.xlsx",
              r"F:\\Research and Analysis\\Air Quality\\Annual Reports DRI\\AQ data 2020.xlsx",
              r"F:\\Research and Analysis\\Air Quality\\Annual Reports DRI\\AQ data 2019.xlsx"
              ]

# 2. Read Data from Each File Daily Data
dfs = []  # List to store DataFrames from each file
sheet_name = 'daily'  # Name of the sheet to read

for file_path in file_paths:
    df = pd.read_excel(file_path, sheet_name=sheet_name, header=[0, 1])
    dfs.append(df)

# 3. Concatenate DAirrames
DailyAir_df= pd.concat(dfs, ignore_index=True)

# Melt the DataFrame to long format
df_long = DailyAir_df.melt(id_vars=[('SITE', 'date')], var_name=['id', 'variable'], value_name='value')

# Rename columns for clarity
df_long = df_long.rename(columns={('SITE', 'date'): 'date'})

# 4. Clean and Transform Data
df_long['date'] = df_long['date'].dt.strftime('%m/%d/%y')

#rename variable values
# These are the values currenlty in the variable column in sde 
# CO - 8 hr max(ppm)
# NO2 - 1 hr max(ppm)
# O3 - 8 hr max(ppm)
# PM10 - 24 hr max (mg/m3)
# PM 2.5 - 24 hr max(mg/m3)


# drop rows with variable name RH,BP, RWD, RWD.1, RWS Tmp
df_long = df_long[~df_long['variable'].isin(['RH', 'BP', 'RWD', 'RWD.1', 'RWS', 'Tmp'])]
#Rename Variables in variable column
df_long['variable'] = df_long['variable'].replace({
    'COmax': 'CO - 1 hr max (ppm)', 
    'max8hrCO': 'CO - 8 hr max (ppm)', 
    'NO2_avg': 'NO2 - annual mean (ppm)', 
    'NO2max': 'NO2 - 1 hr max (ppm)', 
    'O3max': 'O3 - 1 hr max (ppm)', 
    'max8hrO3': 'O3 - 8 hr max (ppm)', 
    'PM10max': 'PM10 - 24 hr max (mg/m3)',
    'PM10avg': 'PM10 - annual mean (mg/m3)',
    'PM2.5max': 'PM 2.5 - 24 hr max (mg/m3)',
    'PM2.5avg': 'PM 2.5 - annual mean (mg/m3)',
    'PM2.5avg.1': 'PM 2.5 - annual mean (mg/m3)'
})

#FORMAT DATAFRAMES
# Define the path to the scratch geodatabase






In [14]:
# 4. Save DataFrame to CSV (Temporary File)
temp_csv = r"C:\Users\snewsome\Documents\Monitoring data updates\temp_air_quality.csv"
df_long.to_csv(temp_csv, index=False)

# 5. Convert CSV to Table in Geodatabase
gdb_path = r"F:\Research and Analysis\Workspace\Sarah\Scratch.gdb"
table_name = "AirQuality_Temp"
output_table = os.path.join(gdb_path, table_name)

if arcpy.Exists(output_table):
    arcpy.Delete_management(output_table)  # Ensure old data is removed

arcpy.TableToTable_conversion(temp_csv, gdb_path, table_name)

# 6. Append to Final Feature Class
final_fc = os.path.join(gdb_path, "AirQuality_Final")

if not arcpy.Exists(final_fc):
    # Create the feature class if it does not exist
    arcpy.CreateFeatureclass_management(gdb_path, "AirQuality_Final", "POINT")

# Append new data
arcpy.Append_management(output_table, final_fc, "NO_TEST")

print("Data successfully appended to", final_fc)

Data successfully appended to F:\Research and Analysis\Workspace\Sarah\Scratch.gdb\AirQuality_Final
