In [1]:
import pandas as pd
import psycopg2
import os, io
from dotenv import load_dotenv    

In [3]:
query_products = """
SELECT id AS variant_id,
       catalog_category_id AS category_id,
       catalog_brand_id AS brand_id,
       status,
       is_new,
       reviews_count,
       reviews_average_score_price,
       reviews_average_score_quality,
       reviews_average_score_properties,
       reviews_average_score_overall,
       in_sets_count,
       is_in_stock,
       is_returnable,
       purchase_price,
       eshop_stock_count,
       country_of_origin_code,
       name
       
       FROM catalog_products
"""

In [4]:
load_dotenv()

True

In [5]:
def create_db_conn():
    try:
        conn = psycopg2.connect(host=os.getenv('DB_HOST'), database=os.getenv('DB_NAME'),
                                             user=os.getenv('DB_USER'), password=os.getenv('DB_PASSWORD'),
                                         port=int(os.getenv('DB_PORT')))
        return conn
    except psycopg2.DatabaseError as e:
        print(f'database connection {e}')
        return None
    except Exception as e:
        print(f'unknown error {e}')
        return None


def read_sql_iostream(query: str, block_mergejoin=False, block_hashjoin=False, block_seqscan=False) -> pd.DataFrame:
    """
    More effective way of loading content of database table to dataframe using io stream - StringIO.
    :param str query: Query select for accessing data in table.
    :param con: Connection to concrete database.
    :return pd.Dataframe: Output dataframe loaded from database.
    """
    try:
        con = create_db_conn()
        cur = con.cursor()
        copy_sql = f"COPY ({query.strip().rstrip(';')}) TO STDOUT WITH CSV HEADER"
        store = io.StringIO()
        cur.copy_expert(copy_sql, store)
        store.seek(0)
        df = pd.read_csv(store, na_values=['NULL', 'NaN', 'nan', 'null', ''], keep_default_na=False)
    except Exception as e:
        raise e
    finally:
        try:
            cur.close()
            con.close()
        except Exception as e:
            print(f'error- {e}')
            pass
    return df

In [6]:
# Check whether the data folder exists or not
if not os.path.exists('./data'):
    os.makedirs('./data')

In [33]:
query = """
    SELECT
        so.price_without_vat AS order_price_without_vat,
        so.price_with_vat AS order_price_with_vat,
        so.bill_country,
        so.setting_currency_id,
        so.created_at,
        so.shop_basket_id,
        so.doc_date,
        so.exchange_currency_rate,
        so.source_type AS source,

        sc.code AS currency_code,
        sc.currency_symbol,
        sc.price_round_system,


        sb.total_price_before_discount_with_vat AS basket_total_price_before_discount_with_vat,
        sb.total_price_with_vat AS basket_total_price_with_vat,
        sb.count_basket_items,
        sb.count_products AS basket_count_products,
        sb.basket_type,

        sbi.quantity AS item_quantity,
        sbi.item_type,
        sbi.unit_price_with_vat AS item_unit_price_with_vat,
        sbi.unit_price_without_vat AS item_unit_price_without_vat,
        sbi.total_discount_with_vat AS item_total_discount_with_vat,


        cp.id as product_id,
        cp.code AS product_code,
        cp.catalog_category_id,
        cp.catalog_brand_id,
        cp.name AS product_name,
        cp.status AS product_status,
        cp.reviews_count,
        cp.reviews_average_score_price,
        cp.reviews_average_score_quality,
        cp.reviews_average_score_properties,
        cp.reviews_average_score_overall,
        cp.reviews_average_score,
        cp.is_in_stock,
        cp.is_ended,
        cp.is_new,
        cp.is_boosted,
        cp.purchase_price AS product_purchase_price,
        cp.eshop_stock_count,
        cp.is_fifo,
        cp.name_parameterize AS product_name_parameterize,

        cc.name AS category,
        cc.tree_path,
        cc.name_parameterize AS category_name_parameterized,
        cc.status AS category_status,
        cc.catalog_segment_id,
        cc.ancestor_ids AS categories_ancestor_ids,
        cc.descendant_ids AS categories_descendant_ids,
        cc.full_name_path AS category_full_name_path,
        cc.default_warranty_period,

        cb.name AS brand_name,
        cb.name_parameterize AS brand_parameterized,

        cs.name AS segment_name,
        cs.name_parameterize AS segment_parameterezied,
        cs.status AS segment_status


    FROM shop_orders so
    LEFT JOIN setting_currencies sc ON so.setting_currency_id = sc.id
    INNER JOIN shop_baskets sb ON sb.id = so.shop_basket_id
    LEFT JOIN shop_basket_items sbi ON  sb.id = sbi.shop_basket_id
    INNER JOIN catalog_products cp ON cp.id = sbi.catalog_product_id
    LEFT JOIN catalog_categories cc ON cp.catalog_category_id = cc.id
    LEFT JOIN catalog_brands cb ON cp.catalog_brand_id = cb.id
    LEFT JOIN catalog_segments cs ON cs.id = cp.catalog_segment_id
    LIMIT 1000000
    OFFSET 3000000
    """
orders.append(read_sql_iostream(query))

In [8]:
orders.columns

Index(['order_price_without_vat', 'order_price_with_vat', 'bill_country',
       'setting_currency_id', 'created_at', 'shop_basket_id', 'doc_date',
       'exchange_currency_rate', 'source', 'currency_code', 'currency_symbol',
       'price_round_system', 'basket_total_price_before_discount_with_vat',
       'basket_total_price_with_vat', 'count_basket_items',
       'basket_count_products', 'basket_type', 'item_quantity', 'item_type',
       'item_unit_price_with_vat', 'item_unit_price_without_vat',
       'item_total_discount_with_vat', 'product_id', 'product_code',
       'catalog_category_id', 'catalog_brand_id', 'product_name',
       'product_status', 'reviews_count', 'reviews_average_score_price',
       'reviews_average_score_quality', 'reviews_average_score_properties',
       'reviews_average_score_overall', 'reviews_average_score', 'is_in_stock',
       'is_ended', 'is_new', 'is_boosted', 'product_purchase_price',
       'eshop_stock_count', 'is_fifo', 'product_name_paramete

In [15]:
orders

Unnamed: 0,order_price_without_vat,order_price_with_vat,bill_country,setting_currency_id,created_at,shop_basket_id,doc_date,exchange_currency_rate,source,currency_code,...,catalog_segment_id,categories_ancestor_ids,categories_descendant_ids,category_full_name_path,default_warranty_period,brand_name,brand_parameterized,segment_name,segment_parameterezied,segment_status
0,858.67000,1039.00,CZ,4,2021-05-22 07:16:21.641469,3005091,2021-05-22,25.5220,eshop,CZK,...,1,"{3,4,119,120}",{},"{Music,Guitars,""Guitar Strings"",""Electric Guit...",24,Ernie Ball,ernie-ball,Music,music,active
1,1995.04206,2414.00,CZ,4,2021-05-22 08:32:29.915039,2998301,2021-05-22,25.5220,eshop,CZK,...,1,"{3,204,252}",{},"{Music,""Bass guitars"",""Bass Effects and Pedals...",24,Zoom,zoom,Music,music,active
2,1995.04206,2414.00,CZ,4,2021-05-22 08:32:29.915039,2998301,2021-05-22,25.5220,eshop,CZK,...,1,"{3,4,185}",{},"{Music,Guitars,""Guitar Pickups"",""Humbucker Pic...",24,Partsland,partsland,Music,music,active
3,446.72000,545.00,IT,6,2021-05-22 09:08:43.769084,3005398,2021-05-22,1.0000,eshop,EUR,...,3,"{1182,1267}",{},"{Bikes,""Cycling accessories"",""Bicycle carriers""}",24,Thule,thule,Bike,bike,active
4,28280.32000,35920.00,HU,10,2021-05-22 09:09:49.706365,3005484,2021-05-22,357.4000,eshop,HUF,...,1,"{3,4,112}",{},"{Music,Guitars,Tuners,""Floor Pedal Tuners""}",24,Korg,korg,Music,music,active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
658985,220.02000,264.02,SK,6,2020-06-26 11:47:33.633743,1423852,2020-06-26,1.0000,eshop,EUR,...,1,"{3,607,812,2059}",{},"{Music,""Studio / PA"",""Bags and Dustcovers"",""Ba...",24,Alto Professional,alto-professional,Music,music,active
658986,213.48000,256.17,SK,6,2020-06-26 12:06:26.611717,1423938,2020-06-26,1.0000,eshop,EUR,...,1,"{3,4,5}",{},"{Music,Guitars,""Electric Guitars"",""Super-ST Ty...",24,Schecter,schecter,Music,music,active
658987,556.91000,685.00,PL,12,2020-06-26 12:31:27.479337,1424015,2020-06-26,4.4504,eshop,PLN,...,1,"{3,607,711,712}",{},"{Music,""Studio / PA"",Microphones,""Condenser Mi...",24,Shure,shure,Music,music,active
658988,135.77000,166.99,PL,12,2020-06-26 13:36:44.233573,1424248,2020-06-26,4.4504,eshop,PLN,...,5,"{1761,1065,1066,1067}",{},"{""Audio Video Tech"",Other,Merchandise,""Outfit ...",24,AC/DC,ac-dc,Audio Video Tech,tech,active


In [10]:
products = read_sql_iostream(query_products)
products

Unnamed: 0,variant_id,category_id,brand_id,status,is_new,reviews_count,reviews_average_score_price,reviews_average_score_quality,reviews_average_score_properties,reviews_average_score_overall,in_sets_count,is_in_stock,is_returnable,purchase_price,eshop_stock_count,country_of_origin_code,name
0,250796,763.0,16.0,active,f,3,100.0,100.0,100.0,100.0,0,t,t,3.41575,40.0,CN,BSMC300
1,347982,3523.0,7754.0,active,t,0,0.0,0.0,0.0,0.0,0,f,t,82.59000,0.0,US,Regulus LV Red/Black 38
2,241059,2955.0,164.0,active,f,0,0.0,0.0,0.0,0.0,0,t,t,151.96000,2.0,CN,Evolve 30 - Case
3,4908,300.0,15.0,ended,f,2,90.0,100.0,80.0,0.0,0,f,t,,0.0,,RD-300SX
4,256397,1326.0,1576.0,active,f,0,0.0,0.0,0.0,0.0,0,f,t,67.80000,0.0,VN,Sirio 84 Black/White 295
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351546,75502,1131.0,872.0,ended,f,0,0.0,0.0,0.0,0.0,0,f,t,,0.0,VN,Boys Therma Top Hz University Red/White S
351547,78948,1131.0,921.0,ended,f,0,0.0,0.0,0.0,0.0,0,f,t,,0.0,CN,Dwayne Tour Insula Mens Sweater Kings Blue 3XL
351548,81765,1131.0,866.0,ended,f,0,0.0,0.0,0.0,0.0,0,f,t,,0.0,CN,1/4 Zip Blocked Mens Sweater Caviar XL
351549,162843,60.0,616.0,archived,f,0,0.0,0.0,0.0,0.0,0,f,t,17.08000,0.0,CN,Soprano Ukulele Set (B-Stock) #923506


In [11]:
orders.to_csv('data/data.csv', index=False)
products.to_csv('data/products.csv', index=False)

In [16]:
data =  pd.read_csv('data/data.csv', sep=',', parse_dates=['doc_date', 'created_at'], low_memory=False)

In [37]:
full_orders = pd.concat([orders[0], orders[1], orders[2], orders[3]])

In [38]:
full_orders.to_csv('data/data.csv', index=False)