# ─────────────────────────── PUSH ALL DATA TO DATABASE ─────────────────────────

In [6]:
"""
Upload processed CSV files into PostgreSQL tables.

This script will:
1. Load database connection parameters from a .env file.
2. Create a SQLAlchemy engine.
3. Iterate over every CSV in ../data/processed.
4. Read each CSV as text-only into a pandas DataFrame.
5. Map every column to SQL TEXT type.
6. Replace (or create) the corresponding table in the target schema.
"""

from pathlib import Path
from dotenv import dotenv_values
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.types import Text

## ───────────────────────────── LOAD CONFIG ─────────────────────────────

In [7]:
# Read .env into a dict of strings
config    = dotenv_values()

# Extract Postgres credentials / connection info
pg_user   = config['POSTGRES_USER']
pg_pass   = config['POSTGRES_PASS']
pg_host   = config['POSTGRES_HOST']
pg_port   = config['POSTGRES_PORT']
pg_db     = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']

In [8]:
# Build the SQLAlchemy database URL
db_url = f"postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}"

# This engine will manage connections & SQL execution
engine = create_engine(db_url)

## ─────────────────────────── PROCESS CSV FILES ─────────────────────────

In [9]:
data_dir = Path("../data/processed/")

In [10]:
# Walk both “;” and “,” subfolders
for csv_path in data_dir.glob("*/*.csv"):
    # derive table name from filename
    table_name = csv_path.stem.lower().replace("-", "_")

    # determine delimiter from the parent folder name
    sep = csv_path.parent.name  # either ";" or ","

    # read CSV as text, with the correct delimiter
    df = pd.read_csv(
        csv_path,
        dtype=str,
        sep=sep,
        engine="python",
        on_bad_lines="warn",
        encoding="utf-8"
    )

    # map every column to TEXT in Postgres
    dtype_dict = {col: Text() for col in df.columns}
    
    # write (replace) into the target schema
    df.to_sql(
        name      = table_name,
        con       = engine,
        schema    = pg_schema,
        if_exists = "replace",
        index     = False,
        dtype     = dtype_dict
    )
    
    print(f"Uploaded: {pg_schema}.{table_name}")

Uploaded: capstone_jan_artur._handelsnamen_pkw
Uploaded: capstone_jan_artur.fz_08.7_raw
Uploaded: capstone_jan_artur._modellreihen
Uploaded: capstone_jan_artur.fz_08.6_raw
Uploaded: capstone_jan_artur.fz_08.3_raw
Uploaded: capstone_jan_artur.fz_10.1_raw
Uploaded: capstone_jan_artur.fz_08.2_raw
Uploaded: capstone_jan_artur.fz_08.16_raw
Uploaded: capstone_jan_artur.fz_08.9_raw
Uploaded: capstone_jan_artur.fz_08.8_raw
Uploaded: capstone_jan_artur._eu_recall_2025_06_09
Uploaded: capstone_jan_artur._ladesaeulenregister_2025_05_07
Uploaded: capstone_jan_artur._rueckruf_2025_06_09
