In [34]:
import pandas as pd
from shapely.geometry import Point
from sqlalchemy import create_engine, text, Table, Column, Integer, String, MetaData, Sequence, SmallInteger
from sqlalchemy.exc import ProgrammingError
import numpy as np
import configparser

In [35]:
config = configparser.ConfigParser()
config.read("database_settings.ini")
ini = config["default"]

In [36]:
engine = create_engine(f"postgresql://{ini['db_user']}:{ini['db_password']}@{ini['db_host']}:{ini['db_port']}/{ini['db_name']}")

In [37]:
conn = engine.connect()

In [38]:
try:
    conn.execute(text("CREATE EXTENSION postgis"))
    conn.execute(text("CREATE EXTENSION postgis_topology"))
    conn.commit()
except ProgrammingError as e:
    conn.rollback()
    print(e)


(psycopg2.errors.DuplicateObject) extension "postgis" already exists

[SQL: CREATE EXTENSION postgis]
(Background on this error at: https://sqlalche.me/e/20/f405)


In [39]:
try:
    result = conn.execute(text("SELECT postgis_full_version()"))
    conn.commit()
    print(result.fetchall())
except ProgrammingError as e:
    conn.rollback()
    print(e)

[('POSTGIS="3.3.2 POSTGIS_REVISION" [EXTENSION] (liblwgeom version mismatch: "3.3.2 4975da8") PGSQL="150" GEOS="3.11.2-CAPI-1.17.2" PROJ="9.2.0" LIBXML= ... (26 characters truncated) ... BPROTOBUF="1.4.1" WAGYU="0.5.0 (Internal)" (core procs from "3.3.2 4975da8" need upgrade) TOPOLOGY (topology procs from "3.3.2 4975da8" need upgrade)',)]


In [40]:
try:
    conn.execute(text(f"DROP SEQUENCE {ini['db_schema']}.id_seq"))
    
    conn.commit()
    print(result.fetchall())
except ProgrammingError as e:
    conn.rollback()
    print(e)

[]


In [41]:
try:
    conn.execute(text(f"DROP TABLE {ini['db_schema']}.{ini['table_name']}"))
    conn.commit()
    print(result.fetchall())
except ProgrammingError as e:
    conn.rollback()
    print(e)

[]


In [42]:
meta = MetaData()

db_table = Table(
    ini["table_name"], meta,
    Column("id", Integer, Sequence("id_seq", start=1), primary_key=True),
    Column("age", SmallInteger),
    Column("gender", String(6)),
    Column("marital_status", String(17)),
    Column("occupation", String(255)),
    Column("monthly_income", String(255)),
    Column("education", String(255)),
    Column("family_size", SmallInteger),
    Column("pin_code", SmallInteger),
    Column("medium_p1", String(255)),
    Column("medium_p2", String(255)),
    Column("meal_p1", String(255)),
    Column("meal_p2", String(255)),
    Column("preference_p1", String(255)),
    Column("preference_p2", String(255)),
    Column("ease_and_convenience", String(17)),
    Column("time_saving", String(17)),
    Column("more_restaurant_choices", String(17)),
    Column("easy_payment_option", String(17)),
    Column("more_offers_and_discounts", String(17)),
    Column("good_food_quality", String(17)),
    Column("good_tracking_system", String(17)),
    Column("self_cooking", String(17)),
    Column("health_concern", String(17)),
    Column("late_delivery", String(17)),
    Column("poor_hygiene", String(17)),
    Column("bad_past_experience", String(17)),
    Column("unavailable", String(17)),
    Column("unaffordable", String(17)),
    Column("long_delivery_time", String(17)),
    Column("delivery_assignment_delay", String(17)),
    Column("delivery_pickup_delay", String(17)),
    Column("wrong_order", String(17)),
    Column("missing_item", String(17)),
    Column("ordered_by_mistake", String(17)),
    Column("time_influence", String(5)),
    Column("order_time", String(255)),
    Column("max_wait_time", String(255)),
    Column("busy_location_residence", String(17)),
    Column("google_maps_accuracy", String(17)),
    Column("good_road_condition", String(17)),
    Column("low_quantity_low_time", String(17)),
    Column("deliver_person_availability", String(17)),
    Column("rating_influence", String(5)),
    Column("less_delivery_time", String(20)),
    Column("high_package_quality", String(20)),
    Column("call_number", String(20)),
    Column("politeness", String(20)),
    Column("freshness", String(20)),
    Column("temperature", String(20)),
    Column("good_taste", String(20)),
    Column("good_quantity", String(20)),
    Column("output", String(3)),
    Column("review", String(255))
)

db_table.create(engine)

In [43]:
dataset = pd.read_csv(ini["path_to_csv"])

In [44]:
dataset["Reviews"] = np.where(dataset['Reviews'].str.contains("nil", case=False), np.nan, dataset['Reviews'])

In [45]:
dataset['point'] = dataset.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

In [46]:
dataset['geom'] = dataset['point'].apply(lambda point: point.wkt)
dataset.drop(["latitude", "longitude", "point"], axis=1, inplace=True)

In [47]:
dataset.to_sql(ini["table_name"], conn, if_exists='replace')

388

In [48]:
conn.close()