### Step PS1: Get the Personalize boto3 Client

In [2]:
import boto3

import json
import numpy as np
import pandas as pd
import time

!wget -N https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize.json
!wget -N https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize-runtime.json
!aws configure add-model --service-name personalize --service-model file://personalize.json
!aws configure add-model --service-name personalize-runtime --service-model file://personalize-runtime.json

personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

--2019-03-10 23:18:39--  https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize.json
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.196.56
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.196.56|:443... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘personalize.json’ not modified on server. Omitting download.

--2019-03-10 23:18:40--  https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize-runtime.json
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.196.56
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.196.56|:443... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘personalize-runtime.json’ not modified on server. Omitting download.



### Step PS2: Create a Bucket and Specify Data Output Location

In [2]:
sts = boto3.client('sts')
account_id = sts.get_caller_identity().get('Account')

s3 = boto3.client('s3')
bucket = '{}-product-recommendation-personalize'.format(account_id)
s3.create_bucket(Bucket=bucket)

filename = "DEMO-movie-lens-100k.csv"

### Step PS3: Download, Prepare, and Upload Training Data

#### Download and Explore the Dataset

In [3]:
!wget -N http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -o ml-100k.zip
data = pd.read_csv('./ml-100k/u.data', sep='\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])
pd.set_option('display.max_rows', 5)
data

--2019-03-10 09:50:49--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.34.235
Connecting to files.grouplens.org (files.grouplens.org)|128.101.34.235|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2019-03-10 09:50:50 (20.9 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base    

Unnamed: 0,USER_ID,ITEM_ID,RATING,TIMESTAMP
0,196,242,3,881250949
1,186,302,3,891717742
...,...,...,...,...
99998,13,225,2,882399156
99999,12,203,3,879959583


#### Prepare and Upload Data

In [4]:
data = data[data['RATING'] > 3.6]                # keep only movies rated 3.6 and above
data = data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below
data.to_csv(filename, index=False)

boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

### Step PS4: Create Schema

In [None]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "movie100-schema",
    schema = json.dumps(schema)
)

schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))


### Step PS5: Create and Wait for Dataset Group

#### Create Dataset Group

In [None]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "movie100-dataset-group"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print json.dumps(create_dataset_group_response, indent=2)

#### Wait for Dataset Group to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print "DatasetGroup: {}".format(status)
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Step PS6: Create Dataset

In [None]:
dataset_type = "INTERACTIONS"
dataset_name = "ratings"
create_dataset_response = personalize.create_dataset(
    name = dataset_name,
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']
print json.dumps(create_dataset_response, indent=2)

### Step PS7: Prepare, Create, and Wait for Dataset Import Job

#### Attach policy to S3 bucket

In [5]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy));

#### Create S3 Read Only Access Role

In [6]:
iam = boto3.client("iam")

role_name = "PersonalizeS3Role"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
);

iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
);

role_arn = create_role_response["Role"]["Arn"]
print role_arn

ClientError: An error occurred (AccessDenied) when calling the CreateRole operation: User: arn:aws:sts::344028372807:assumed-role/MachineLearningWorkshopSagemakerRole/SageMaker is not authorized to perform: iam:CreateRole on resource: arn:aws:iam::344028372807:role/PersonalizeS3Role

#### Create Dataset Import Job

In [None]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "movie100-dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, filename)
    },
    roleArn = "arn:aws:iam::349934754982:role/PersonalizeS3Role"
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print json.dumps(create_dataset_import_job_response, indent=2)

#### Wait for Dataset Import Job and Dataset Import Job Run to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    
    dataset_import_job = describe_dataset_import_job_response["datasetImportJob"]
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print "DatasetImportJob: {}".format(status)
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print "LatestDatasetImportJobRun: {}".format(status)
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Step PS8: Select Recipe

In [None]:
recipe_list = [
    "arn:aws:personalize:::recipe/awspersonalizehrnnmodel",
    "arn:aws:personalize:::recipe/awspersonalizedeepfmmodel",
    "arn:aws:personalize:::recipe/awspersonalizesimsmodel",
    "arn:aws:personalize:::recipe/awspersonalizeffnnmodel",
    "arn:aws:personalize:::recipe/popularity-baseline"
]

recipe_arn = recipe_list[0]
print recipe_arn

### Step PS9: Create and Wait for Solution

#### Create Solution

In [None]:
create_solution_response = personalize.create_solution(
    name = "movie100-solution",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn,
    minProvisionedTPS = 1
)

solution_arn = create_solution_response['solutionArn']
print json.dumps(create_solution_response, indent=2)

#### Wait for Solution to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_response = personalize.describe_solution(
        solutionArn = solution_arn
    )
    status = describe_solution_response["solution"]["status"]
    print "Solution: {}".format(status)
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

#### Get Metrics of Solution

In [None]:
get_metrics_response = personalize.get_metrics(
    solutionArn = solution_arn
)

print json.dumps(get_metrics_response, indent=2)

describe_solution_response = personalize.describe_solution(
    solutionArn = solution_arn
)
latest_solution_version_arn = describe_solution_response['solution']['latestSolutionVersion']['solutionVersionArn']
print(latest_solution_version_arn)

### Step PS10: Create and Wait for Campaign

#### Create campaign

In [None]:
create_campaign_response = personalize.create_campaign(
    name = "movie100-campaign",
    solutionVersionArn = latest_solution_version_arn,
    updateMode = "MANUAL"
)

campaign_arn = create_campaign_response['campaignArn']
print json.dumps(create_campaign_response, indent=2)

#### Wait for Campaign to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print "Campaign: {}".format(status)
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Step PS11: Get Recommendations

#### Select a User and an Item

In [5]:
items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], header=None)
items.columns = ['ITEM_ID', 'TITLE']

user_id = 344
print "USER: {}".format(user_id)

items

USER: 344


Unnamed: 0,ITEM_ID,TITLE
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...
6,7,Twelve Monkeys (1995)
7,8,Babe (1995)
8,9,Dead Man Walking (1995)
9,10,Richard III (1995)


#### Call GetRecommendations

In [6]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = 'arn:aws:personalize:us-east-1:344028372807:campaign/movie100-campaign', #campaign_arn,
    userId = str(user_id)
)

item_list = get_recommendations_response['itemList']
title_list = [items.loc[items['ITEM_ID'] == np.int(item['itemId'])].values[0][-1] for item in item_list]

print "Recommendations: {}".format(json.dumps(title_list, indent=2))

Recommendations: [
  "Richard III (1995)", 
  "Everyone Says I Love You (1996)", 
  "Nosferatu (Nosferatu, eine Symphonie des Grauens) (1922)", 
  "Old Yeller (1957)", 
  "My Best Friend's Wedding (1997)", 
  "Mad City (1997)", 
  "Private Parts (1997)", 
  "Strange Days (1995)", 
  "Birds, The (1963)", 
  "Canadian Bacon (1994)", 
  "Raise the Red Lantern (1991)", 
  "Ghost in the Shell (Kokaku kidotai) (1995)", 
  "Spawn (1997)", 
  "Terminator, The (1984)", 
  "Desperate Measures (1998)", 
  "Dead Man (1995)", 
  "Return of the Jedi (1983)", 
  "McHale's Navy (1997)", 
  "One Fine Day (1996)", 
  "Boomerang (1992)", 
  "Close Shave, A (1995)", 
  "Face/Off (1997)", 
  "Star Trek IV: The Voyage Home (1986)", 
  "Letter From Death Row, A (1998)", 
  "Sleepers (1996)"
]
