## Import amazon review data

In [1]:
import os
import numpy as np

In [2]:
import json
import pandas as pd
import gzip

def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield eval(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

In [3]:
wd = os.getcwd()

In [4]:
data_dir = os.path.join(wd,"..","data")

In [5]:
df = getDF(os.path.join(data_dir, "reviews_Grocery_and_Gourmet_Food.json.gz"))

# Add recall info

## Import list of asins that appear in both datasets

In [6]:
asins_dir = os.path.join(wd,"..","asins")

In [7]:
with open(os.path.join(asins_dir, "asin_intersection.txt")) as asin_file:
    asin_intersection= asin_file.read().splitlines()
    asin_file.close()

## Create new dataframe with boolean "recalled" column

In [8]:
df_recall = pd.DataFrame({"asin": asin_intersection})

In [9]:
rev_recall = pd.merge(df, df_recall, how = "outer", on = ["asin"], indicator = True)

In [10]:
rev_recall["recalled"] = rev_recall["_merge"] == "both"

In [11]:
rev_recall = rev_recall.drop("_merge",1)

## Sort/Separate Data (if needed)

### Sort by recalled status 

In [12]:
rev_recall_sorted = rev_recall.sort_values(by="recalled", ascending = False)

### Put in two seperate dataframes 

In [13]:
nonrecall = rev_recall.groupby(["recalled"]).get_group(False)

In [14]:
recall = rev_recall.groupby(["recalled"]).get_group(True)

# Add metadata

In [15]:
metadata = getDF(os.path.join(data_dir, "meta_Grocery_and_Gourmet_Food.json.gz"))

In [16]:
metadata_col_subset = metadata.drop(["imUrl", "related"],1)

In [17]:
full_df = pd.merge(rev_recall, metadata_col_subset, how = "left", on = ["asin"])

In [18]:
full_df

Unnamed: 0,unixReviewTime,asin,reviewTime,helpful,overall,reviewText,reviewerID,summary,reviewerName,recalled,salesRank,categories,description,title,price,brand
0,1381449600,0657745316,"10 11, 2013","[0, 0]",5.0,"No sugar, no GMO garbage, no fillers that come...",A1ZQZ8RJS1XVTX,Best vanilla I've ever had,gsxrgirl,False,{'Grocery & Gourmet Food': 374004},[[Grocery & Gourmet Food]],This is real vanilla extract made with only 3 ...,100 Percent All Natural Vanilla Extract,,
1,1354752000,0700026444,"12 6, 2012","[1, 1]",5.0,"This is my absolute, undisputed favorite tea r...",A31W38VGZAUUM4,Terrific Tea!,FIFA Lvr,False,{'Grocery & Gourmet Food': 620307},[[Grocery & Gourmet Food]],"Silverpot Tea, Pure Darjeeling, is an exquisit...",Pure Darjeeling Tea: Loose Leaf,,
2,1385942400,1403796890,"12 2, 2013","[0, 0]",1.0,I ordered spongbob slippers and I got John Cen...,A3I0AV0UJX5OH0,grrrrrrr,Alicia b,False,,[[Grocery & Gourmet Food]],Must have for any WWE Fan\n \n \n \nFeaturing ...,WWE Kids Todler Velvet Slippers featuring John...,3.99,
3,1307836800,1403796890,"06 12, 2011","[0, 0]",3.0,The cart is fine and works for the purpose for...,A3QAAOLIXKV383,Storage on Wheels Cart,"Danny K. Tilley ""Dan Tilley""",False,,[[Grocery & Gourmet Food]],Must have for any WWE Fan\n \n \n \nFeaturing ...,WWE Kids Todler Velvet Slippers featuring John...,3.99,
4,1332547200,141278509X,"03 24, 2012","[1, 1]",5.0,This product by Archer Farms is the best drink...,AB1A5EGHHVA9M,The best drink mix,CHelmic,False,{'Grocery & Gourmet Food': 620322},[[Grocery & Gourmet Food]],Infused with Vitamins and Electrolytes Good So...,Archer Farms Strawberry Dragonfruit Drink Mix ...,,
5,1362268800,1453060375,"03 3, 2013","[0, 2]",1.0,Don't buy this item - rip off at this price. ...,A3DTB6RVENLQ9Q,Oops. Made a mistake and ordered this. I mis...,TR-RhodeIsland,False,{'Grocery & Gourmet Food': 268754},[[Grocery & Gourmet Food]],MiO Energy is your portable energy source givi...,Mio Energy Liquid Water Enhancer Black Cherry ...,11.99,Mio
6,1374019200,1453060464,"07 17, 2013","[0, 0]",3.0,My wife picked some of this up on sale. I usu...,A3LZA698SQPCXE,Tastes a Bit like Cough Syrup,"Trevor L ""God is on the side of the army with...",False,{'Grocery & Gourmet Food': 221057},[[Grocery & Gourmet Food]],With these Splash water flavor enhancers you a...,Splash Energy Liquid Water Enhancer 24 Serving...,,
7,1376956800,1453060782,"08 20, 2013","[1, 1]",4.0,I bought these on sale (2 for $4) at my local ...,A2XZPK86YY9R6G,"Decent flavor, value...meh.",Ixalmida,False,,[[Grocery & Gourmet Food]],With these Splash water flavor enhancers you a...,Splash Energy Liquid Water Enhancer 24 Serving...,6.39,
8,1391904000,1603112251,"02 9, 2014","[0, 0]",3.0,I had a martini at a local distillery that use...,A2MWO0CISKXJ9,Interesting bitters,"Mary S ""one800mary""",False,{'Grocery & Gourmet Food': 43972},[[Grocery & Gourmet Food]],Become a cocktail king with these unique bitte...,Cocktail Kingdom Wormwood Bitters - 5 oz,17.95,
9,1388534400,1613170416,"01 1, 2014","[0, 0]",5.0,"I bought these to be part of an Xmas gift, so ...",ACDUAY8AH3T72,Beautiful dice,"Antaeus Feldspar ""AF""",False,{'Toys & Games': 26935},"[[Grocery & Gourmet Food, Beverages, Coffee, C...",Twelve dice inspired by the Dresden Files nove...,Evil Hat Productions Fate Dice: Winter Knight,14.05,Evil Hat Productions
