## Packages

In [1]:
import numpy as np
import sys
import os
import pandas as pd
import boto3

# Data storage
from sqlalchemy import create_engine  # SQL helper
import psycopg2 as psql  # PostgreSQL DBs

sys.path.append("..")

In [2]:
import keys
import data_fcns as dfc

## Options

In [3]:
# So that items in pandas columns don't show truncated values 
pd.set_option('display.max_colwidth', -1)

## Create Official item factors matrix or dataframe

In [4]:
item_factors_df = pd.read_pickle('support_data/item_factors_20190916.pkl')

In [5]:
item_factors_df.head()

Unnamed: 0,id,features
0,60,"[0.01743680238723755, 0.007148396223783493, 1.06563401222229, 0.04059157520532608, 0.019294701516628265, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.006994778756052256, 0.0, 0.0, 0.0, 0.017005732282996178, 0.0, 0.036391135305166245, 0.0, 0.0, 0.0, 0.2501244843006134, 0.0, 0.0, 0.0, 0.0]"
1,80,"[0.0, 0.0, 0.03249182552099228, 0.11323057115077972, 0.0, 0.0, 0.0, 0.7948412299156189, 0.0, 0.0, 0.0, 0.0, 0.23714490234851837, 0.03495769575238228, 0.29481297731399536, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22936636209487915, 0.03450985625386238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12904486060142517]"
2,110,"[0.3528019189834595, 0.0, 0.0, 0.0, 0.2717379629611969, 0.09929965436458588, 0.03336532041430473, 0.0, 0.0, 0.02282828837633133, 0.00937669724225998, 0.03452523052692413, 0.0, 0.0, 0.11529584974050522, 0.0, 0.0, 0.0, 0.12241517752408981, 0.01565462350845337, 0.13469548523426056, 0.003043871372938156, 0.856399416923523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12838362157344818, 0.10266077518463135]"
3,200,"[0.014895367436110973, 0.32599085569381714, 0.09774906188249588, 0.05869003385305405, 0.016703536733984947, 0.0, 0.13506878912448883, 0.0, 0.0, 0.27643218636512756, 0.0, 0.03294535353779793, 0.0, 0.0, 0.18430395424365997, 0.9149331450462341, 0.13594630360603333, 0.0, 0.00893563311547041, 0.0, 0.0, 0.013972891494631767, 0.0, 0.0011329196859151125, 0.03825383633375168, 0.08892543613910675, 0.05580912530422211, 0.0, 0.0, 0.4417659640312195]"
4,240,"[0.0, 0.05928525701165199, 0.0, 0.0, 0.7101511359214783, 0.0, 0.08753978461027145, 0.0, 0.06151632219552994, 0.2771930396556854, 0.0, 0.007922252640128136, 0.0, 0.0, 0.3503260612487793, 0.14580319821834564, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11093205213546753, 0.19919617474079132, 0.0, 0.3611432611942291, 0.0]"


## Get comics data from DB

In [6]:
# Define path to secret
secret_path_aws = os.path.join(os.environ['HOME'], '.secret',
                               'aws_ps_flatiron.json')
secret_path_aws

aws_keys = keys.get_keys(secret_path_aws)
user = aws_keys['user']
ps = aws_keys['password']
host = aws_keys['host']
db = aws_keys['db_name']

aws_ps_engine = ('postgresql://' + user + ':' + ps + '@' + host + '/' + db)

# Setup PSQL connection
conn = psql.connect(
    database=db,
    user=user,
    password=ps,
    host=host,
    port='5432'
)

In [7]:
# Instantiate cursor
cur = conn.cursor()

In [8]:
#  Count records.
query = """
    SELECT * FROM comics 
"""

# Execute the query
cur.execute(query)

conn.commit()

In [9]:
# Check results
comics_pdf = pd.DataFrame(cur.fetchall())
comics_pdf.columns = [col.name for col in cur.description]

In [10]:
comics_pdf.head()

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,235,Amber Blake (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers/amber_blake.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/amber_blake.jpg
1,358,Armor Hunters Bloodshot (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/armor_hunters_bloodshot.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/armor_hunters_bloodshot.jpg
2,500,A Year of Marvels Unbeatable (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/a_year_of_marvels_unbeatable.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/a_year_of_marvels_unbeatable.jpg
3,711,Beef (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/beef.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/beef.jpg
4,726,Belladonna (Avatar),https://comrx.s3-us-west-2.amazonaws.com/covers/belladonna.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/belladonna.jpg


In [11]:
comics_pdf.shape

(7202, 4)

In [12]:
comics_pdf.head()

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,235,Amber Blake (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers/amber_blake.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/amber_blake.jpg
1,358,Armor Hunters Bloodshot (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/armor_hunters_bloodshot.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/armor_hunters_bloodshot.jpg
2,500,A Year of Marvels Unbeatable (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/a_year_of_marvels_unbeatable.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/a_year_of_marvels_unbeatable.jpg
3,711,Beef (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/beef.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/beef.jpg
4,726,Belladonna (Avatar),https://comrx.s3-us-west-2.amazonaws.com/covers/belladonna.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/belladonna.jpg


### Need to reduce to comics with counts >= 20

In [26]:
comic_ids = pd.read_json('raw_data/comic_ids.json',orient='records', lines=True )

In [27]:
comic_ids.shape

(790, 1)

In [28]:
comics_pdf_small = comics_pdf.merge(comic_ids, left_on='comic_id', right_on='comic_id', how='inner', )

In [30]:
comics_pdf.shape

(7202, 4)

In [29]:
comics_pdf_small.shape

(790, 4)

In [31]:
comics_pdf_small.head()

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,1316,Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
1,1634,Dceased (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/dceased.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/dceased.jpg
2,2400,From Under Mountains (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/from_under_mountains.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/from_under_mountains.jpg
3,2986,Head Lopper (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/head_lopper.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/head_lopper.jpg
4,6348,Tmnt Ongoing (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg


## Merge item factors and comics

In [32]:
# Change column names for item_factors_df
item_factors_df.columns = ['comic_id', 'features']

In [33]:
combo = item_factors_df.merge(comics_pdf_small, left_on='comic_id', right_on='comic_id', how='inner', )

In [34]:
combo.set_index(['comic_id'], inplace=True)

In [35]:
combo.head()

Unnamed: 0_level_0,features,comic_title,img_url,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
60,"[0.01743680238723755, 0.007148396223783493, 1.06563401222229, 0.04059157520532608, 0.019294701516628265, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.006994778756052256, 0.0, 0.0, 0.0, 0.017005732282996178, 0.0, 0.036391135305166245, 0.0, 0.0, 0.0, 0.2501244843006134, 0.0, 0.0, 0.0, 0.0]",8house (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/8house.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/8house.jpg
80,"[0.0, 0.0, 0.03249182552099228, 0.11323057115077972, 0.0, 0.0, 0.0, 0.7948412299156189, 0.0, 0.0, 0.0, 0.0, 0.23714490234851837, 0.03495769575238228, 0.29481297731399536, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22936636209487915, 0.03450985625386238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12904486060142517]",Action Comics Annual (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
110,"[0.3528019189834595, 0.0, 0.0, 0.0, 0.2717379629611969, 0.09929965436458588, 0.03336532041430473, 0.0, 0.0, 0.02282828837633133, 0.00937669724225998, 0.03452523052692413, 0.0, 0.0, 0.11529584974050522, 0.0, 0.0, 0.0, 0.12241517752408981, 0.01565462350845337, 0.13469548523426056, 0.003043871372938156, 0.856399416923523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12838362157344818, 0.10266077518463135]",Adventure Time (Boom),https://comrx.s3-us-west-2.amazonaws.com/covers/adventure_time.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/adventure_time.jpg
200,"[0.014895367436110973, 0.32599085569381714, 0.09774906188249588, 0.05869003385305405, 0.016703536733984947, 0.0, 0.13506878912448883, 0.0, 0.0, 0.27643218636512756, 0.0, 0.03294535353779793, 0.0, 0.0, 0.18430395424365997, 0.9149331450462341, 0.13594630360603333, 0.0, 0.00893563311547041, 0.0, 0.0, 0.013972891494631767, 0.0, 0.0011329196859151125, 0.03825383633375168, 0.08892543613910675, 0.05580912530422211, 0.0, 0.0, 0.4417659640312195]",All Star Batman (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/all_star_batman.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/all_star_batman.jpg
240,"[0.0, 0.05928525701165199, 0.0, 0.0, 0.7101511359214783, 0.0, 0.08753978461027145, 0.0, 0.06151632219552994, 0.2771930396556854, 0.0, 0.007922252640128136, 0.0, 0.0, 0.3503260612487793, 0.14580319821834564, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11093205213546753, 0.19919617474079132, 0.0, 0.3611432611942291, 0.0]",American Vampire Anthology (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers/american_vampire_anthology.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/american_vampire_anthology.jpg


In [36]:
comics_pdf.loc[comics_pdf['comic_id']==20]

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
105,20,1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg


In [37]:
combo.shape

(790, 4)

In [38]:
combo.drop(['img_url'], axis=1, inplace=True)

In [39]:
combo.shape

(790, 3)

In [40]:
combo.head()

Unnamed: 0_level_0,features,comic_title,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
60,"[0.01743680238723755, 0.007148396223783493, 1.06563401222229, 0.04059157520532608, 0.019294701516628265, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.006994778756052256, 0.0, 0.0, 0.0, 0.017005732282996178, 0.0, 0.036391135305166245, 0.0, 0.0, 0.0, 0.2501244843006134, 0.0, 0.0, 0.0, 0.0]",8house (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/8house.jpg
80,"[0.0, 0.0, 0.03249182552099228, 0.11323057115077972, 0.0, 0.0, 0.0, 0.7948412299156189, 0.0, 0.0, 0.0, 0.0, 0.23714490234851837, 0.03495769575238228, 0.29481297731399536, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22936636209487915, 0.03450985625386238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12904486060142517]",Action Comics Annual (DC),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
110,"[0.3528019189834595, 0.0, 0.0, 0.0, 0.2717379629611969, 0.09929965436458588, 0.03336532041430473, 0.0, 0.0, 0.02282828837633133, 0.00937669724225998, 0.03452523052692413, 0.0, 0.0, 0.11529584974050522, 0.0, 0.0, 0.0, 0.12241517752408981, 0.01565462350845337, 0.13469548523426056, 0.003043871372938156, 0.856399416923523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12838362157344818, 0.10266077518463135]",Adventure Time (Boom),https://comrx.s3-us-west-2.amazonaws.com/covers_large/adventure_time.jpg
200,"[0.014895367436110973, 0.32599085569381714, 0.09774906188249588, 0.05869003385305405, 0.016703536733984947, 0.0, 0.13506878912448883, 0.0, 0.0, 0.27643218636512756, 0.0, 0.03294535353779793, 0.0, 0.0, 0.18430395424365997, 0.9149331450462341, 0.13594630360603333, 0.0, 0.00893563311547041, 0.0, 0.0, 0.013972891494631767, 0.0, 0.0011329196859151125, 0.03825383633375168, 0.08892543613910675, 0.05580912530422211, 0.0, 0.0, 0.4417659640312195]",All Star Batman (DC),https://comrx.s3-us-west-2.amazonaws.com/covers_large/all_star_batman.jpg
240,"[0.0, 0.05928525701165199, 0.0, 0.0, 0.7101511359214783, 0.0, 0.08753978461027145, 0.0, 0.06151632219552994, 0.2771930396556854, 0.0, 0.007922252640128136, 0.0, 0.0, 0.3503260612487793, 0.14580319821834564, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11093205213546753, 0.19919617474079132, 0.0, 0.3611432611942291, 0.0]",American Vampire Anthology (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/american_vampire_anthology.jpg


In [41]:
combo.columns = ['features', 'comic_title', 'img_url']

In [42]:
combo.head()

Unnamed: 0_level_0,features,comic_title,img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
60,"[0.01743680238723755, 0.007148396223783493, 1.06563401222229, 0.04059157520532608, 0.019294701516628265, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.006994778756052256, 0.0, 0.0, 0.0, 0.017005732282996178, 0.0, 0.036391135305166245, 0.0, 0.0, 0.0, 0.2501244843006134, 0.0, 0.0, 0.0, 0.0]",8house (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/8house.jpg
80,"[0.0, 0.0, 0.03249182552099228, 0.11323057115077972, 0.0, 0.0, 0.0, 0.7948412299156189, 0.0, 0.0, 0.0, 0.0, 0.23714490234851837, 0.03495769575238228, 0.29481297731399536, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22936636209487915, 0.03450985625386238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12904486060142517]",Action Comics Annual (DC),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
110,"[0.3528019189834595, 0.0, 0.0, 0.0, 0.2717379629611969, 0.09929965436458588, 0.03336532041430473, 0.0, 0.0, 0.02282828837633133, 0.00937669724225998, 0.03452523052692413, 0.0, 0.0, 0.11529584974050522, 0.0, 0.0, 0.0, 0.12241517752408981, 0.01565462350845337, 0.13469548523426056, 0.003043871372938156, 0.856399416923523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12838362157344818, 0.10266077518463135]",Adventure Time (Boom),https://comrx.s3-us-west-2.amazonaws.com/covers_large/adventure_time.jpg
200,"[0.014895367436110973, 0.32599085569381714, 0.09774906188249588, 0.05869003385305405, 0.016703536733984947, 0.0, 0.13506878912448883, 0.0, 0.0, 0.27643218636512756, 0.0, 0.03294535353779793, 0.0, 0.0, 0.18430395424365997, 0.9149331450462341, 0.13594630360603333, 0.0, 0.00893563311547041, 0.0, 0.0, 0.013972891494631767, 0.0, 0.0011329196859151125, 0.03825383633375168, 0.08892543613910675, 0.05580912530422211, 0.0, 0.0, 0.4417659640312195]",All Star Batman (DC),https://comrx.s3-us-west-2.amazonaws.com/covers_large/all_star_batman.jpg
240,"[0.0, 0.05928525701165199, 0.0, 0.0, 0.7101511359214783, 0.0, 0.08753978461027145, 0.0, 0.06151632219552994, 0.2771930396556854, 0.0, 0.007922252640128136, 0.0, 0.0, 0.3503260612487793, 0.14580319821834564, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11093205213546753, 0.19919617474079132, 0.0, 0.3611432611942291, 0.0]",American Vampire Anthology (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/american_vampire_anthology.jpg


## Create pickle file

In [43]:
#combo.to_pickle('support_data/comics_factors_201908.pkl')

In [44]:
combo.to_pickle('support_data/comics_factors_20190916.pkl')

# Test 2019.08.12 fix

In [63]:
# Read new pickle

In [64]:
cf_new = pd.read_pickle('support_data/comics_factors_201908.pkl')

In [65]:
# Read old pickle 

In [66]:
cf_old = pd.read_pickle('support_data/comics_factors.pkl')

In [67]:
cf_new.shape

(6028, 4)

In [68]:
cf_old.shape

(6028, 3)

In [69]:
cf_new.head()

Unnamed: 0_level_0,features,comic_title,img_url,lrg_img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/21st_century_tank_girl.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/4001_ad.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/68_homefront.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/68_homefront.jpg


In [70]:
cf_old.head()

Unnamed: 0_level_0,features,comic_title,img_url
comic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,"[-0.7526867389678955, -0.21263617277145386, -1.1569229364395142, 0.19937847554683685, 0.16002951562404633]",13th Artifact One Sho (Topcow),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
20,"[-0.3515812158584595, 0.4757572114467621, -1.2307846546173096, 0.5941579341888428, -0.11247903108596802]",1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
30,"[0.1806577742099762, -0.48153993487358093, -0.9925048351287842, -0.04510089382529259, -0.6085895299911499]",21st Century Tank Girl (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/21st_century_tank_girl.jpg
40,"[-0.06164746731519699, -0.23286470770835876, -0.20415398478507996, 0.3465690016746521, -1.2049529552459717]",4001 Ad (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/4001_ad.jpg
50,"[-0.4028661251068115, -0.3713889420032501, -1.1736090183258057, 0.15386144816875458, 0.5422862768173218]",68 Homefront (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/68_homefront.jpg
