## Packages

In [13]:
import numpy as np
import sys
import os
import pandas as pd
import boto3

# Data storage
from sqlalchemy import create_engine  # SQL helper
import psycopg2 as psql  # PostgreSQL DBs

sys.path.append("..")

In [14]:
import keys
import data_fcns as dfc

## Options

In [15]:
# So that items in pandas columns don't show truncated values 
pd.set_option('display.max_colwidth', -1)

## Create Official item factors matrix or dataframe

In [16]:
item_factors_df = pd.read_pickle('support_data/item_factors.pkl')

In [17]:
item_factors_df.head()

Unnamed: 0,id,features
0,10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]"
1,20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]"
2,30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]"
3,40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]"
4,50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]"


## Get comics data from DB

In [18]:
# Define path to secret
secret_path_aws = os.path.join(os.environ['HOME'], '.secret',
                               'aws_ps_flatiron.json')
secret_path_aws

aws_keys = keys.get_keys(secret_path_aws)
user = aws_keys['user']
ps = aws_keys['password']
host = aws_keys['host']
db = aws_keys['db_name']

aws_ps_engine = ('postgresql://' + user + ':' + ps + '@' + host + '/' + db)

# Setup PSQL connection
conn = psql.connect(
    database=db,
    user=user,
    password=ps,
    host=host,
    port='5432'
)

In [19]:
# Instantiate cursor
cur = conn.cursor()

In [20]:
#  Count records.
query = """
    SELECT * FROM comics 
"""

# Execute the query
cur.execute(query)

conn.commit()

In [21]:
# Check results
comics_pdf = pd.DataFrame(cur.fetchall())
comics_pdf.columns = [col.name for col in cur.description]

In [22]:
comics_pdf.head()

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,235,Amber Blake (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers/amber_blake.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/amber_blake.jpg
1,358,Armor Hunters Bloodshot (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/armor_hunters_bloodshot.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/armor_hunters_bloodshot.jpg
2,500,A Year of Marvels Unbeatable (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/a_year_of_marvels_unbeatable.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/a_year_of_marvels_unbeatable.jpg
3,711,Beef (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/beef.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/beef.jpg
4,726,Belladonna (Avatar),https://comrx.s3-us-west-2.amazonaws.com/covers/belladonna.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/belladonna.jpg


## Merge item factors and comics

In [23]:
# Change column names for item_factors_df
item_factors_df.columns = ['comic_id', 'features']

In [24]:
combo = item_factors_df.merge(comics_pdf, left_on='comic_id', right_on='comic_id', how='inner', )

In [25]:
combo.set_index(['comic_id'], inplace=True)

In [26]:
combo.head()

Unnamed: 0_level_0,features,comic_title,img_url,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/21st_century_tank_girl.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/4001_ad.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/68_homefront.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/68_homefront.jpg


In [27]:
comics_pdf.loc[comics_pdf['comic_id']==20]

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
105,20,1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg


In [28]:
combo.shape

(6028, 4)

In [30]:
combo.drop(['img_url'], axis=1, inplace=True)

In [31]:
combo.shape

(6028, 3)

In [32]:
combo.head()

Unnamed: 0_level_0,features,comic_title,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers_large/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers_large/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/68_homefront.jpg


In [33]:
combo.columns = ['features', 'comic_title', 'img_url']

In [34]:
combo.head()

Unnamed: 0_level_0,features,comic_title,img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers_large/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers_large/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/68_homefront.jpg


## Create pickle file

In [35]:
combo.to_pickle('support_data/comics_factors_201908.pkl')

# Test 2019.08.12 fix

In [63]:
# Read new pickle

In [64]:
cf_new = pd.read_pickle('support_data/comics_factors_201908.pkl')

In [65]:
# Read old pickle 

In [66]:
cf_old = pd.read_pickle('support_data/comics_factors.pkl')

In [67]:
cf_new.shape

(6028, 4)

In [68]:
cf_old.shape

(6028, 3)

In [69]:
cf_new.head()

Unnamed: 0_level_0,features,comic_title,img_url,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/21st_century_tank_girl.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/4001_ad.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/68_homefront.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/68_homefront.jpg


In [70]:
cf_old.head()

Unnamed: 0_level_0,features,comic_title,img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/68_homefront.jpg
