In [None]:
import psycopg2
import json
import os

### Connection with the database

In [None]:
with open("credentials.json", "r", encoding="utf-8") as file:
    credentials = json.load(file)

In [None]:
db_host = credentials["db_host"]
db_name = credentials["db_name"]
db_user = credentials["db_user"]
db_password = credentials["db_password"]
db_port = credentials["db_port"] 

conn = psycopg2.connect(
    host=db_host,
    dbname=db_name,
    user=db_user,
    password=db_password,
    port=db_port
)

In [None]:
cur = conn.cursor()

cur.execute("""CREATE TABLE leukemia_raw_data (
            patient_id SERIAL PRIMARY KEY,
            age INTEGER NOT NULL,
            gender VARCHAR(10) NOT NULL,
            country VARCHAR(100) NOT NULL,
            wbc_count INTEGER NOT NULL,
            rbc_count NUMERIC(5,2) NOT NULL,
            platelet_count INTEGER NOT NULL,
            hemoglobin_level NUMERIC(5,2) NOT NULL,
            bone_marrow_blasts INTEGER NOT NULL,
            genetic_mutation VARCHAR(3) CHECK (genetic_mutation IN ('Yes', 'No')),
            family_history VARCHAR(3) CHECK (family_history IN ('Yes', 'No')),
            smoking_status VARCHAR(50),
            alcohol_consumption VARCHAR(3) CHECK (alcohol_consumption IN ('Yes', 'No')),
            radiation_exposure VARCHAR(3) CHECK (radiation_exposure IN ('Yes', 'No')),
            infection_history VARCHAR(3) CHECK (infection_history IN ('Yes', 'No')),
            bmi NUMERIC(5,2) NOT NULL,
            chronic_illness VARCHAR(3) CHECK (chronic_illness IN ('Yes', 'No')),
            immune_disorders VARCHAR(3) CHECK (immune_disorders IN ('Yes', 'No')),
            ethnicity VARCHAR(50),
            socioeconomic_status VARCHAR(50),
            urban_rural VARCHAR(50),
            leukemia_status VARCHAR(10) CHECK (leukemia_status IN ('Positive', 'Negative'))
        );
""")
conn.commit()

In [None]:
os.chdir("..")
print(os.getcwd())

In [None]:
csv_file_path = os.path.join(os.getcwd(), "data", "biased_leukemia_dataset.csv")

table_name = 'leukemia_raw_data'

copy_sql = f"""
           COPY {table_name} FROM stdin 
           DELIMITER as ','
           CSV HEADER
           """
with open(csv_file_path, 'r') as f:
    cur.copy_expert(sql=copy_sql, file=f)

conn.commit()

conn.close()