In [73]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
import pickle
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn

In [2]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [30]:
FRAMEWORK_VERSION = "0.23-1"
script_path = "train.py"
sklearn = SKLearn(
    entry_point=script_path,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.c4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={},
    
)

In [31]:
sklearn.fit()

2021-05-20 23:11:12 Starting - Starting the training job...
2021-05-20 23:11:35 Starting - Launching requested ML instancesProfilerReport-1621552271: InProgress
......
2021-05-20 23:12:35 Starting - Preparing the instances for training......
2021-05-20 23:13:41 Downloading - Downloading input data...
2021-05-20 23:14:07 Training - Downloading the training image...
2021-05-20 23:14:42 Uploading - Uploading generated training model
2021-05-20 23:14:42 Completed - Training job completed
[34m2021-05-20 23:14:29,892 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-05-20 23:14:29,894 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-20 23:14:29,904 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-05-20 23:14:30,276 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-05-20 23:14:30,289 sagemaker-training-to

In [32]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

-------------!

In [33]:
import os
import io
import boto3
import json
import csv
ENDPOINT_NAME = 'sagemaker-scikit-learn-2021-05-20-23-14-54-193'

In [34]:
runtime= boto3.client('runtime.sagemaker')

In [76]:
categ = np.array(['C','S','H','E','B'])#['Clothing','Shoes','Handbags','Electronics','Books']
d = {'0women1':[(0.5,50),(0.2,50),(0.05,50),(0.15,500),(0.1,30)],
     '0women2':[(0.45,150),(0.25,150),(0.15,200),(0.1,900),(0.05,30)],
     '0women3':[(0.3,50),(0.3,50),(0.2,100),(0.1,500),(0.1,30)],
     '1men1':[(0.1,50),(0.5,100),(0.01,50),(0.3,500),(0.09,30)],
     '1men2':[(0.1,100),(0.35,150),(0.01,50),(0.5,900),(0.04,30)],
     '1men3':[(0.1,50),(0.35,50),(0.05,50),(0.3,500),(0.2,30)]}


def make_data(num):
    data = {'gender':[],'age':[],'category':[], 'price':[]}
    age = {'1':(10,25),'2':(25,50),'3':(50,70)}
    purchased = []
    for k in d:
        for i in range(len(categ)):
            c = d[k][i]
            N = int(num*c[0])
            nn = (num-N)//2
            data['gender']+=([int(k[0])]*2*(nn+N))
            data['category']+=([categ[i]]*2*(nn+N))
            purchased+=([1]*2*N)
            data['age']+=list(np.random.randint(age[k[-1]][0],age[k[-1]][1], size=2*N))
            data['price']+= list(np.random.randint(c[1]*0.5,c[1]*1.5, size=2*N))
            purchased+=([0]*2*nn)
            data['age']+=list(np.random.randint(age[k[-1]][0],age[k[-1]][1], size=2*nn))
            data['price']+= list(np.random.randint(1,c[1]*0.4, size=nn))
            data['price']+= list(np.random.randint(c[1]*1.6,c[1]*3, size=nn))

    df = pd.DataFrame.from_dict(data)
    df = pd.get_dummies(df)
    return df,purchased
test_X, test_y = make_data(100)

In [44]:
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType='text/csv',
                                       Body=test_X.to_csv(header=False, index=False).encode("utf-8"))

In [45]:
result = json.loads(response['Body'].read().decode())

In [49]:
np.array(result)[:,1]

array([0.64981356, 0.64546752, 0.66764762, ..., 0.23194844, 0.22775755,
       0.26298402])

In [29]:
#boto3.client("sagemaker").delete_endpoint(EndpointName=ENDPOINT_NAME)

{'ResponseMetadata': {'RequestId': '0febe935-eb10-462d-b914-287de6cbe006',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0febe935-eb10-462d-b914-287de6cbe006',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 20 May 2021 23:10:45 GMT'},
  'RetryAttempts': 0}}

In [50]:
data = {
    "username": "abc@abc.abc",
    "data": [
        {
            "_id": "60998936a9de4e00112fa196",
            "category": "book",
            "name": "flow",
            "description": "brand new",
            "price": 12,
            "seller": "0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380",
            "longitude": 123,
            "latitude": 123,
            "__v": 0
        },
        {
            "_id": "60",
            "category": "electronic",
            "name": "flow",
            "description": "brand new",
            "price": 500,
            "seller": "0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380",
            "longitude": 123,
            "latitude": 123,
            "__v": 0
        }
    ]
}

In [59]:
json2 = [{"_id":"1",
         "category":"book","name":"flow","description":"brand new","price":22,
         "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
        {"_id":"2",
         "category":"clothing","name":"flow","description":"brand new","price":50,
         "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
       {"_id":"3",
         "category":"electronic","name":"flow","description":"brand new","price":1000,
         "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
        {"_id":"4",
 "category":"shoes","name":"flow","description":"brand new","price":127,
 "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
        {"_id":"5",
 "category":"clothing","name":"flow","description":"brand new","price":120,
 "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
        {"_id":"6",
 "category":"handbags","name":"flow","description":"brand new","price":199,
 "seller":"0xE9ABC5FDb983f371fd76F20d40da7892b7f8b380","longitude":123,"latitude":123,"__v":0},
       ]

In [60]:
users = (('1', 'abc', 'Molly', 'li', 'female', '20'),('2', 'abc', 'Charlie', 'li', 'male', '20'),
         ('3', 'abc', 'Crystal', 'li', 'female', '35'),('4', 'abc', 'Mike', 'li', 'male', '35'),
        ('5', 'abc', 'Emily', 'li', 'female', '60'),('6', 'abc', 'John', 'li', 'male', '60'))

In [65]:
import os
import io
import boto3
import json
import csv
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", 'pymysql'])

import pymysql
# grab environment variables
ENDPOINT_NAME = 'sagemaker-scikit-learn-2021-05-20-23-14-54-193'
runtime= boto3.client('runtime.sagemaker')

def lambda_handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))
    data = json.loads(json.dumps(event))
    user_id = data['username']
    item = data['data']
    conn = pymysql.connect(
        host = "cc-instance.c75aookn21ch.us-east-2.rds.amazonaws.com",
        port = 3306,
        user = "admin",
        password = "Zj19660412!",
        db = "cloudComputing",
    )
    cur=conn.cursor()
    cur.execute("SELECT * FROM Users WHERE username=%s",user_id)
    user = cur.fetchall()[0]
    
    items, categories, prices = [], [], []
    for i in item:
        items.append(i['_id'])
        categories.append(i['category'])
        prices.append(i['price']*int(2779))
    X = pd.DataFrame(columns=['gender', 'age', 'price', 'category_B', 'category_C', 'category_E',
       'category_H', 'category_S'])
    X['price'] = prices
    emp = np.zeros(len(items),int)
    X['category_B'],X['category_C'],X['category_E'],X['category_H'],X['category_S'] = emp,emp,emp,emp,emp
    for i in range(len(categories)):
        X['category_'+categories[i][0].upper()].iloc[i] = 1
    gender = 1 if user[-2].lower()=='male' else 0
    age = int(user[-1])
    X['gender'] = [gender]*len(items)
    X['age'] = [age]*len(items)
    items = np.array(items)
    
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType='text/csv',
                                       Body=X.to_csv(header=False, index=False).encode("utf-8"))

    result = json.loads(response['Body'].read().decode())
    pred = np.array(result)[:,1]
    reco = {"recommend":list(items[np.argsort(pred)[::-1]])}
    return json.dumps(reco)

In [66]:
lambda_handler(None, None)

'{"recommend": ["4", "3", "1", "2", "5", "6"]}'