
## Create Table in Database

In [15]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine, text
import warnings

warnings.filterwarnings("ignore")

In [None]:
# Conection
host = 'localhost'
port = '5433'
database = 'layereddb'
schema='berlin_source_data'

#connection to db after i opened tunnel
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')

In [None]:
#create vet clinics table if not exists
creating_vet_clinics_table = f""" CREATE TABLE IF NOT EXISTS {schema}.veterinary_clinics 

(
  clinic_id           VARCHAR(64) PRIMARY KEY,              -- e.g. 'node/123' or your stringified OSM id
  clinic_name         VARCHAR(200) NOT NULL,
  street              VARCHAR(100) NOT NULL,
  house_number        VARCHAR(10)  NOT NULL,
  postcode            VARCHAR(5),
  city                VARCHAR(100) NOT NULL,
  district_id         VARCHAR(100) REFERENCES {schema}.districts(district_id) ON UPDATE CASCADE ON DELETE SET NULL,
  neighbourhood_id    VARCHAR(100),
  phone_number        VARCHAR(50),
  website             VARCHAR(100),
  email               VARCHAR(100),
  opening_hours       VARCHAR(255),
  operator            VARCHAR(100),
  speciality          VARCHAR(100),
  wheelchair_acces    VARCHAR(50),                          -- normalized: yes|no|limited
  full_address        VARCHAR(255),
  -- geometries (SRID 4326)
  pt_geom             geometry(Point, 4326),
  footprint_geom      geometry(Geometry, 4326)            -- Polygon/MultiPolygon if you have it

);
"""

with engine.connect() as conn:
    conn.execute(text(creating_vet_clinics_table))
    conn.commit()  # commit the transaction

In [None]:
# --- CONFIG ---
CSV_PATH   = "../sources/vets_with_districts_neighborhoods.csv"   
SCHEMA     = "berlin_source_data"                       
TABLE_NAME = f"veterinary_clinics"

# --- LOAD & INSERT IN CHUNKS ---
chunksize = 50_000  # tune as needed
total_rows = 0
for i, chunk in enumerate(pd.read_csv(CSV_PATH, chunksize=chunksize)):

    chunk.to_sql(
        TABLE_NAME,
        engine,
        schema=SCHEMA,
        if_exists="append",
        index=False,
        method="multi",     # batches multi-row INSERTs
    )
    total_rows += len(chunk)
    print(f"Inserted chunk {i+1}: {len(chunk):,} rows (total {total_rows:,})")

print(f"âœ… Done. Inserted ~{total_rows:,} rows into {SCHEMA}.{TABLE_NAME}")

# --- VERIFY ---
check = pd.read_sql(
    f"SELECT COUNT(*) AS rows FROM {SCHEMA}.{TABLE_NAME};", engine
)
print(check)
