## Load data to postgresql

In [4]:
import pandas as pd 
import mysql.connector
import numpy as np

df = pd.read_csv('cleaned_air_quality_washington.csv')

print(df.head())

  county       city  month  week  weekly_pollutant_mean  first_max_value  \
0  Clark  Vancouver      1     1               5.233598             20.0   
1  Clark  Vancouver      1     2               4.232879             18.0   
2  Clark  Vancouver      1     3              11.234921             37.0   
3  Clark  Vancouver      1     4               2.589792             16.0   
4  Clark  Vancouver      1     5               5.274074             17.0   

   weekly_aqi_avg  local_site_name  year  
0       26.490741                1  2024  
1       21.426530                1  2024  
2       46.460317                1  2024  
3       13.243760                1  2024  
4       26.666667                1  2024  


In [6]:
# Connecting to MySQL
mysql_con = mysql.connector.connect(host='localhost', user='aqi_user', password='Vishal123', 
                                     database='ev_project_final')

In [8]:
# Created cursor to operate
cursor = mysql_con.cursor()

In [14]:
# Converting all pandas and numpy nullable types to Python types
con_df = df.copy()  # Avoid modifying original DataFrame
for col in con_df.columns:
    dtype = con_df[col].dtype
    if pd.api.types.is_integer_dtype(dtype):  # Handles pandas UInt32Dtype, Int64Dtype, etc.
        con_df[col] = con_df[col].astype('int32', errors='ignore').fillna(0).apply(lambda x: int(x))  # Convert to Python int
    elif pd.api.types.is_float_dtype(dtype):  # Handles float64, float32
        con_df[col] = con_df[col].astype('float64', errors='ignore').fillna(0.0).apply(lambda x: float(x))  # Convert to Python float
    elif pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype):
        con_df[col] = con_df[col].astype(str).replace('nan', '')  # Convert objects/strings, handle NaN
    # Add other type conversions if needed (e.g., datetime)


In [20]:
# Dropping table if it exists
cursor.execute("DROP TABLE IF EXISTS ev_analysis")

In [22]:
# Creating table with data of DataFrame 
columns = con_df.columns
column_dtypes = []
for col in columns:
    dtype = con_df[col].dtype
    if pd.api.types.is_integer_dtype(dtype):
        column_dtypes.append(f"`{col}` INT")
    elif pd.api.types.is_float_dtype(dtype):
        column_dtypes.append(f"`{col}` FLOAT")
    else:
        column_dtypes.append(f"`{col}` VARCHAR(255)")  


In [26]:
# Creating table
create_table = f"CREATE TABLE ev_analysis ({', '.join(column_dtypes)})"
cursor.execute(create_table)

In [28]:
# Insert data in the created table 
insert_data = f"INSERT INTO ev_analysis ({', '.join([f'`{col}`' for col in columns])}) VALUES ({', '.join(['%s'] * len(columns))})"
for row in con_df.itertuples(index=False):
    row = tuple(
        int(x) if pd.api.types.is_integer_dtype(type(x)) 
        else float(x) if pd.api.types.is_float_dtype(type(x)) 
        else str(x) for x in row
    )
    cursor.execute(insert_data, row)

In [32]:
# Commit changes
mysql_con.commit()

In [38]:
print("Cleaned data loaded into MySQL table 'ev_analysis'.")

Cleaned data loaded into MySQL table 'ev_analysis'.


In [36]:
# Close cursor and connection
cursor.close()
mysql_con.close()