In [None]:
import os
import re
import sys
from datetime import datetime

import pymysql
import requests
from tqdm.auto import tqdm

import pandas as pd

# if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
#     root = sys._MEIPASS
# else:
#     cur_dir = os.path.dirname(os.path.realpath('__file__'))
#     root = os.path.abspath(os.path.join(cur_dir, os.pardir, os.pardir))
#     src  = os.path.join(root, 'src')
#     sys.path.append(src)

from database.access import AccessDatabase
from crawling.crawler import get_url, json_iterator
today = datetime.today().strftime('%y%m%d')
db_glamai = AccessDatabase('glamai')
db_jangho = AccessDatabase('jangho')

---
### Sephora Product Update

---
#### 1st) Refinement

In [None]:
from sephora_product.refinement import Refinement
products = Refinement().update_refinement()

---
#### 2nd) Products By Subcategory

In [None]:
from sephora_product.product_keyword import ProductKeyword
upload_df = ProductKeyword().update_product_keyword()

In [None]:
# _upload_df = upload_df.copy()

# today_date = datetime.today().strftime('%Y-%m-%d')
# _upload_df.loc[_upload_df.update_date<today_date, 'is_use'] = 0
# db_glamai.create_table(_upload_df, 'sephora_product_keyword')

---
#### 3rd) Update Vertical data

In [None]:
from sephora_product.vertical_data import VerticalData
VerticalData().update_vertical_data()

---
#### 4th) Update Best & New & Vegan & Organic

In [None]:
from sephora_product.best_new import UpdateBestSellerNew
from sephora_product.keywords import SephoraVeganOrganic
UpdateBestSellerNew().update_best_new()
SephoraVeganOrganic().update_keywords()

---
#### 5th) Review Date

In [None]:
from sephora_product.review_date import ReviewDate
new_product_list, data = ReviewDate().update_review_date()

---
#### 6th) Insert product info

In [None]:
from sephora_product.insert_product_info import update_product_info
result = update_product_info()

In [None]:
result

---
#### 7th) All product update

In [None]:
from sephora_product.all_product_update import update_all_product
data = update_all_product()

---
### Search Keywords Update

In [None]:
from sephora_keyword.search_keyword import update_search_keywords, db_distinction
total_df = update_search_keywords()
db_distinction()

---
### Sephora Review Update

---
#### 1st) Review Data

In [None]:
# backup table
table = 'sephora_txt_data_re'
db_glamai._backup(table_name=table, keep=True)

In [None]:
# update review data
from sephora_review.review_data import ReviewData
txt_data, error = ReviewData()._crawling()

In [None]:
columns = ['product_code', 'product_id', 'rating', 'skin_type', 'eye_color', 'skin_concerns', 'hair_color', 'skin_tone', 'age', 'title', 'txt_data', 'positive_count', 'write_time', 'regist_date']
rev_df = pd.DataFrame(txt_data, columns=columns)

error_df = pd.DataFrame(error, columns=['product_code', 'product_url', 'note'])
error_df_cnt = error_df.groupby('note').count()

rev_df.groupby('product_code').count()

print(\
f"product counts: {len(rev_df.product_code.unique())}\n\
product review counts: {len(rev_df)}\n\
reviews that already exist: {error_df_cnt.iloc[0, 0]}\n\
review does not exist: {error_df_cnt.iloc[1, 0]}")

---
#### 2nd) Review Date Update

In [None]:
from sephora_review.review_data import ReviewDate
result = ReviewDate().update_review_date()

---
#### 3rd) Duplicate check

In [None]:
'''
/* replace */ 
UPDATE sephora_txt_data_re SET txt_data = REPLACE(txt_data, '.Not impressed.', '.') WHERE BINARY(txt_data) LIKE '%Not impressed.';

/* check duplicated */
select product_code, txt_data, write_time, like_count, count(*) as cnt
from sephora_txt_data_re
group by product_code, txt_data, write_time
having cnt > 1;

/* dedup */
delete t1 
from sephora_txt_data_re t1, sephora_txt_data_re t2
where 
t1.product_code = t2.product_code and 
t1.txt_data = t2.txt_data and
t1.write_time = t2.write_time and
t1.like_count = t2.like_count and
t1.pk < t2.pk;
'''

---
###


---
### Sephora Product Status 



In [None]:
from sephora_update.status import update_sephora_status

status_data_dict = {}
verticals = ['face_base', 'eye', 'lip_color', 'moisturizers', 'cheek', 'treatments', 'masks', 'eye_care', 'body_care', 'mens', 'fragrance_men', 'fragrance_women', 'wellness', 'cleansers']
for vertical in tqdm(verticals):
    status_data_df = update_sephora_status(vertical)
    status_data_dict[vertical] = status_data_df

In [None]:
# verticals = ['face_base', 'eye', 'lip_color', 'moisturizers', 'cheek', 'treatments', 'masks', 'eye_care', 'body_care', 'mens', 'fragrance_men', 'fragrance_women', 'wellness', 'cleansers']

# def update_sephora_status_v(vertical):
#     from sephora_update.status import get_status
    
#     table_name = f'{vertical}_product_info'
#     info_df = db_glamai.get_tbl(table_name, ['product_code', 'item_no', 'url', 'price', 'regist_date'])
#     info_df_dedup = info_df.drop_duplicates(subset=['product_code', 'item_no'], keep='first')
#     info_status = []
#     for info in tqdm(info_df_dedup.values):
#         product_code = info[0]
#         item_no = info[1]
#         url = info[2]
#         price_org = info[3]
#         regist_date = info[4]
#         price, is_use = get_status(url, item_no)
#         update_date = datetime.today()
#         if price is None:
#             price = price_org
#         info_status.append([product_code, item_no, url, price, is_use, regist_date, update_date])
        
#     upload_table = f'sephora_{vertical}_data_status'
#     columns = ['product_code', 'item_no', 'url', 'price', 'is_use', 'regist_date', 'update_date']
#     upload_df = pd.DataFrame(info_status, columns=columns)
#     # db_glamai.create_table(upload_df=upload_df, table_name=upload_table)
#     return upload_df

---
### Sephora Product Sale

In [None]:
from sephora_update.sales import update_sephora_sale

price_data_dict = {}
verticals = ['face_base', 'eye', 'lip_color', 'moisturizers', 'cheek', 'treatments', 'masks', 'eye_care', 'body_care', 'mens', 'fragrance_men', 'fragrance_women', 'wellness', 'cleansers']
for vertical in tqdm(verticals):
    price_data = update_sephora_sale(vertical)
    price_data_dict[vertical] = price_data

In [None]:
# verticals = ['face_base', 'eye', 'lip_color', 'moisturizers', 'cheek', 'treatments', 'masks', 'eye_care', 'body_care', 'mens', 'fragrance_men', 'fragrance_women', 'wellness', 'cleansers']

# def update_sephora_sale_v(vertical):
#     # backup table
#     table_name = f'sephora_{vertical}_data_sale'
#     db_glamai._backup(table_name=table_name, keep=True)
    
#     from sephora_update.sales import UpdateProductSale
#     sale = UpdateProductSale()
    
#     sale.__conn__()
#     product_codes = sale.get_data(vertical)
#     sale.insert_data_new(vertical)

#     status_info, price_datas = [], []
#     for product_code in tqdm(product_codes):
#         price_data, status = sale.update_data(product_code, vertical)
        
#         status_info.append([product_code, status])
#         price_datas += price_data
#     sale.__close__()
#     print(f'{vertical} product status update 완료!')

#     return price_data

---
### Affiliate price

---
#### Amazon update

In [None]:
from affiliate.amazon import get_data, _crawling, _upload

In [None]:
df_amazon = get_data()
datas, error = [], []
for value in tqdm(df_amazon.values):
    data = _crawling(value)
    if data is None:
        affiliate_url = value[3]
        error.append(affiliate_url)
    else:
        datas.append(data)
crawling_df, upload_df = _upload(datas)

In [None]:
db_jangho.create_table(upload_df=upload_df, table_name='affiliate_price_update_amazon')

In [None]:
# # sale_price == price

# today = '220914'
# # today = db_jangho.today
# query = f'''
# update jangho.affiliate_price_update_amazon_{today} as a
# join jangho.affiliate_price_update_amazon_{today} as b
# on a.product_code = b.product_code and a.item_no = b.item_no and a.affiliate_type = b.affiliate_type
# set a.sale_price = b.price
# where a.is_sale = 0 and a.is_use = 1 and a.sale_price = 0;'''

# conn, curs = db_jangho._connect()
# curs.execute(query)
# conn.commit()
# curs.close()
# conn.close()

In [None]:
# # is_use = 1

# today = '220914'
# # today = db_jangho.today
# query = f'''
# update affiliate_price_update_amazon_{today}
# set is_use = 1
# where is_use = 0 and price != 0;'''

# conn, curs = db_jangho._connect()
# curs.execute(query)
# conn.commit()
# curs.close()
# conn.close()

In [None]:
# # today = db_jangho.today
query = f'''
update glamai.affiliate_price as a
join jangho.affiliate_price_update_amazon_{today} as b 
on a.product_code = b.product_code and a.item_no = b.item_no and a.affiliate_type = b.affiliate_type
set a.price = b.price, a.sale_price = b.sale_price, a.is_sale = b.is_sale, a.is_use = b.is_use, a.regist_date = b.regist_date, a.update_date = b.update_date;'''

conn, curs = db_jangho._connect()
curs.execute(query)
conn.commit()
curs.close()
conn.close()

---
#### Ulta update

In [None]:
from affiliate.ulta import get_data, _crawling, _upload

In [None]:
df_ulta = get_data()
datas, error = [], []
for value in tqdm(df_ulta.values):
    data = _crawling(value)
    if data is None:
        affiliate_url = value[3]
        error.append(affiliate_url)
    else:
        datas.append(data)
crawling_df, upload_df = _upload(datas)

In [None]:
db_jangho.create_table(upload_df=upload_df, table_name='affiliate_price_update_ulta')

In [None]:
# today = '220913'
# # today = db_jangho.today
# query = f'''
# update jangho.affiliate_price_update_ulta_{today} as a
# join jangho.affiliate_price_update_ulta_{today} as b
# on a.product_code = b.product_code and a.item_no = b.item_no and a.affiliate_type = b.affiliate_type
# set a.sale_price = b.price
# where a.is_sale = 0 and a.is_use = 1 and a.sale_price = 0;'''

# conn, curs = db_jangho._connect()
# curs.execute(query)
# conn.commit()
# curs.close()
# conn.close()

In [None]:
# today = '220913'
# # today = db_jangho.today
# query = f'''
# update affiliate_price_update_ulta_{today}
# set is_use = 1
# where is_use = 0 and price != 0;'''

# conn, curs = db_jangho._connect()
# curs.execute(query)
# conn.commit()
# curs.close()
# conn.close()

In [None]:
today = db_jangho.today
query = f'''
update glamai.affiliate_price as a
join jangho.affiliate_price_update_ulta_{today} as b 
on a.product_code = b.product_code and a.item_no = b.item_no and a.affiliate_type = b.affiliate_type
set a.price = b.price, a.sale_price = b.sale_price, a.is_sale = b.is_sale, a.is_use = b.is_use, a.regist_date = b.regist_date, a.update_date = b.update_date;'''

conn, curs = db_jangho._connect()
curs.execute(query)
conn.commit()
curs.close()
conn.close()


---
### Table Upload 

In [None]:
# # glamai_youtube_urls
# df = pd.read_csv('/Users/yeonseosla/Downloads/glamai_youtube_total_220902_final.csv').iloc[:, 1:]
# df.loc[:, 'regist_date'] = datetime.now()
# db_glamai.create_table(df, 'glamai_youtube_urls')