# 1 - Connect to Database

In [15]:
import sqlite3
from sqlalchemy import create_engine
import pandas as pd

In [16]:
db = create_engine( 'sqlite:///db_olist.sqlite', echo=False )
conn = db.connect()

# 2 - Loading dataset

In [5]:
# dataset
df_costumer = pd.read_csv('database/olist_customers_dataset.csv')

In [7]:
# schema_costumer = """
#     CREATE TABLE costumer(
#         customer_id                 TEXT,
#         customer_unique_id          TEXT,
#         customer_zip_code_prefix    INTEGER,
#         customer_city               TEXT,
#         customer_state              TEXT
#     )
# """

# # create schema
# conn.execute(schema_costumer)

In [None]:
query = """
    SELECT name
    FROM sqlite_master
    WHERE type = 'table'
"""

table = pd.read_sql_query(query, conn)
table

In [None]:
# insert data into table
df_costumer.to_sql('costumer', con=conn, if_exists='append', index=False)

In [None]:
query_select = """
    SELECT * FROM costumer
"""

table = pd.read_sql_query(query_select, conn)
table.head()

# 3 - Loading Others Datasets

### Geolocation

In [16]:
df_geolocation = pd.read_csv('database/olist_geolocation_dataset.csv')
df_geolocation.dtypes

geolocation_zip_code_prefix      int64
geolocation_lat                float64
geolocation_lng                float64
geolocation_city                object
geolocation_state               object
dtype: object

In [34]:
schema_geolocation = """
    CREATE TABLE geolocation(
        geolocation_zip_code_prefix      INTEGER,
        geolocation_lat                  REAL,
        geolocation_lng                  REAL,
        geolocation_city                 TEXT,
        geolocation_state                TEXT
    )
"""

# create schema
conn.execute(schema_geolocation)
# insert data into table
df_geolocation.to_sql('geolocation', con=conn, if_exists='append', index=False)

1000163

### Order Items

In [17]:
# dataset order items
df_order_items = pd.read_csv('database/olist_order_items_dataset.csv')
df_order_items.dtypes

order_id                object
order_item_id            int64
product_id              object
seller_id               object
shipping_limit_date     object
price                  float64
freight_value          float64
dtype: object

In [18]:
schema_order_items = """
    CREATE TABLE order_items(
        order_id               TEXT,
        order_item_id          INTEGER,
        product_id             TEXT,
        seller_id              TEXT,
        shipping_limit_date    TEXT,
        price                  REAL,
        freight_value          REAL
    )
"""

# create schema
conn.execute(schema_order_items)
# insert data into table
df_order_items.to_sql('order_items', con=conn, if_exists='append', index=False)

112650

### Order Payments

In [20]:
df_order_payments = pd.read_csv('database/olist_order_payments_dataset.csv')
df_order_payments.dtypes

order_id                 object
payment_sequential        int64
payment_type             object
payment_installments      int64
payment_value           float64
dtype: object

In [21]:
schema_order_payments = """
    CREATE TABLE order_payments(
        order_id                 TEXT,
        payment_sequential       INTEGER,
        payment_type             TEXT,
        payment_installments     INTEGER,
        payment_value            REAL
    )
"""

# create schema
conn.execute(schema_order_payments)
# insert data into table
df_order_payments.to_sql('order_payments', con=conn, if_exists='append', index=False)

103886

### Order Reviews

In [23]:
df_order_reviews = pd.read_csv('database/olist_order_reviews_dataset.csv')
df_order_reviews.dtypes

review_id                  object
order_id                   object
review_score                int64
review_comment_title       object
review_comment_message     object
review_creation_date       object
review_answer_timestamp    object
dtype: object

In [24]:
schema_order_reviews = """
    CREATE TABLE order_reviews(
        review_id                  TEXT,
        order_id                   TEXT,
        review_score               INTEGER,
        review_comment_title       TEXT,
        review_comment_message     TEXT,
        review_creation_date       TEXT,
        review_answer_timestamp    TEXT
    )
"""

# create schema
conn.execute(schema_order_reviews)
# insert data into table
df_order_reviews.to_sql('order_reviews', con=conn, if_exists='append', index=False)

99224

### Orders

In [25]:
df_orders = pd.read_csv('database/olist_orders_dataset.csv')
df_orders.dtypes

order_id                         object
customer_id                      object
order_status                     object
order_purchase_timestamp         object
order_approved_at                object
order_delivered_carrier_date     object
order_delivered_customer_date    object
order_estimated_delivery_date    object
dtype: object

In [26]:
schema_orders = """
    CREATE TABLE orders(
        order_id                         TEXT,
        customer_id                      TEXT,
        order_status                     TEXT,
        order_purchase_timestamp         TEXT,
        order_approved_at                TEXT,
        order_delivered_carrier_date     TEXT,
        order_delivered_customer_date    TEXT,
        order_estimated_delivery_date    TEXT
    )
"""

# create schema
conn.execute(schema_orders)
# insert data into table
df_orders.to_sql('orders', con=conn, if_exists='append', index=False)

99441

### Products

In [27]:
df_products = pd.read_csv('database/olist_products_dataset.csv')
df_products.dtypes

product_id                     object
product_category_name          object
product_name_lenght           float64
product_description_lenght    float64
product_photos_qty            float64
product_weight_g              float64
product_length_cm             float64
product_height_cm             float64
product_width_cm              float64
dtype: object

In [28]:
schema_products = """
    CREATE TABLE products(
        product_id                    TEXT,
        product_category_name         TEXT,
        product_name_lenght           INTEGER,
        product_description_lenght    INTEGER,
        product_photos_qty            INTEGER,
        product_weight_g              INTEGER,
        product_length_cm             INTEGER,
        product_height_cm             INTEGER,
        product_width_cm              INTEGER
    )
"""

# create schema
conn.execute(schema_products)
# insert data into table
df_products.to_sql('products', con=conn, if_exists='append', index=False)

32951

### Sellers

In [29]:
df_sellers = pd.read_csv('database/olist_sellers_dataset.csv')
df_sellers.dtypes

seller_id                 object
seller_zip_code_prefix     int64
seller_city               object
seller_state              object
dtype: object

In [30]:
schema_sellers = """
    CREATE TABLE sellers(
        seller_id                 TEXT,
        seller_zip_code_prefix    INTEGER,
        seller_city               TEXT,
        seller_state              TEXT
    )
"""

# create schema
conn.execute(schema_sellers)
# insert data into table
df_sellers.to_sql('sellers', con=conn, if_exists='append', index=False)

3095

### Product Category Name

In [31]:
df_product_category_name = pd.read_csv('database/)
df_product_category_name.dtypes

product_category_name            object
product_category_name_english    object
dtype: object

In [32]:
schema_product_category_name = """
    CREATE TABLE product_category_name(
        product_category_name            TEXT,
        product_category_name_english    TEXT
    )
"""

# create schema
conn.execute(schema_product_category_name)
# insert data into table
df_product_category_name.to_sql('product_category_name', con=conn, if_exists='append', index=False)

71

# Check Tables

In [17]:
query = """
    SELECT name
    FROM sqlite_master
    WHERE type = 'table'
"""

table = pd.read_sql_query(query, conn)
table

Unnamed: 0,name
0,costumer
1,order_items
2,order_payments
3,order_reviews
4,orders
5,products
6,sellers
7,product_category_name
8,geolocation


In [13]:
conn.close()
db.dispose()

In [14]:
db

Engine(sqlite:///db_olist.sqlite)