## AWS Sagemakerを用いたゴミの分別モデル作成と評価

### データの準備

In [2]:
import boto3
import os
from IPython.core.display import HTML
from sagemaker import get_execution_role

bucket_name = "plism2022-sagemaker"
image_dir_name = "ai-trash-classify-data"

In [3]:
region = boto3.Session().region_name
s3_bucket = bucket_name

s3 = boto3.resource("s3")
bucket = s3.Bucket(s3_bucket)

In [4]:
for objects in bucket.objects.all():
    if image_dir_name in objects.key:
        if any(key in objects.key for key in ("jpg", "jpeg", "JPEG", "JPG", "png")):
            bucket.download_file(objects.key, objects.key)
        else:
            os.makedirs(objects.key, exist_ok=True)

In [5]:
import glob

images = {}
for dir_name in sorted((glob.glob(image_dir_name + "/test/*"))):
    if "smjs" not in dir_name:
        for filename in sorted(glob.glob(dir_name + "/*")):
            with open(filename, "rb") as file:
                images[filename] = file.read()

### モデルの準備

In [6]:
import json

def query_endpoint(img):
    endpoint_name = 'jumpstart-dft-tf-ic-imagenet-mobilenet-v2-100-224-clas'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/x-image', Body=img, Accept='application/json;verbose')
    return response
    

def parse_prediction(query_response):
    model_predictions = json.loads(query_response['Body'].read())
    predicted_label = model_predictions['predicted_label']
    labels = model_predictions['labels']
    probabilities = model_predictions['probabilities']
    return predicted_label, probabilities, labels 

### ゴミの分別予測

#### 単一ラベルの予測

In [7]:
for filename, img in images.items():
    query_response = query_endpoint(img)
    predicted_label, probabilities, labels = parse_prediction(query_response)
    display(HTML(f'<img src={filename} alt={filename} align="left" style="width: 250px;"/>' 
                 f'<figcaption>Predicted Label is : {predicted_label},</figcaption>' 
                 f'<figcaption>Image Path is : {filename}</figcaption>'))

#### 複数ラベルの予測

In [8]:
def predict_top_k_labels(probabilities, labels,k):
    topk_prediction_ids = sorted(range(len(probabilities)), key=lambda index: probabilities[index], reverse=True)[:k]
    topk_class_labels = ",".join([labels[id] for id in topk_prediction_ids])
    return  topk_class_labels

predicted_labels_list = []
for filename, img in images.items():
    dir_name, img_name = filename.split("/")[1], filename.split("/")[2]
    
    model_predictions = query_endpoint(img)
    predicted_label, probabilities, labels = parse_prediction(model_predictions)
    top5_class_labels = predict_top_k_labels(probabilities,labels,5)
    
    predicted_labels = top5_class_labels.split(",")
    predicted_labels.insert(0, img_name)
    predicted_labels.insert(0, dir_name)
    predicted_labels_list.append(predicted_labels)
    
    display(HTML(f'<img src={filename} alt={filename} align="left" style="width: 250px;"/>' 
                 f'<figcaption>Top-5 model predictions are: {top5_class_labels}</figcaption>' 
                 f'<figcaption>Image Path is : {filename}</figcaption>'))

### 予測結果の保存

In [9]:
import pandas as pd

In [10]:
dataframe_name = "predict_result_finetuned_11_14.xlsx"
column_name = ["dir_path", "img_name", "1st", "2nd", "3rd", "4th", "5th"]

predicted_result_df = pd.DataFrame(predicted_labels_list, columns=column_name)
predicted_result_df.to_excel(dataframe_name, index=False)

### ImageNetラベルを日本語に変換

In [11]:
import pandas as pd
import label_translation_en_to_ja as lt

translator = lt.Ilsvrc2012Japanese()

In [12]:
en_labels_list = pd.read_excel(dataframe_name)
ja_labels_list = en_labels_list.copy()
for key_row, labels in en_labels_list.iterrows():
    for key_col, label in enumerate(labels):
        if translator.convert(label):
            ja_labels_list.iat[key_row, key_col] = translator.convert(label)

In [13]:
ja_labels_list

Unnamed: 0,dir_path,img_name,1st,2nd,3rd,4th,5th
0,test,001_aircap,布巾（お皿拭き）,財布,ノートパソコン,ハンカチ,蚊帳
1,test,002_riceball-packaging,パケット,ビニール袋,おむつ,封筒／エンベロープ,ペーパータオル
2,test,003_can,缶切り,電気なべ,オイルフィルター,妨げる,ゴミ捨て缶
3,test,003_can,テニスボール,ビアグラス,パケット,ビール瓶,ローション
4,test,003_can,ポップなボトル,ビール瓶,缶切り,コーヒーマグカップ,ライター
5,test,003_can,ビアグラス,コーヒーマグカップ,シェイカー,バレル,ゴミ捨て缶
6,test,004_tissue,白衣,トイレットペーパー,酸素マスク,フック,ビニール袋
7,test,005_plastic-bag,ビニール袋,おむつ,シャワーキャップ,郵袋,フープスカート
8,test,005_plastic-bag,ボンネット,ビニール袋,おむつ,シャワーキャップ,ハンカチ
9,test,006_petbottle,ポップなボトル,ウォーターボトル,ビール瓶,クリスマスの靴下,犬（フレンチ・ブルドッグ）


### 予測結果の評価

In [14]:
result_df = pd.read_excel(dataframe_name)

In [15]:
correct_num = 0
predict_nums = 1 # 1~5

for index, result in result_df.iterrows():
    predict_labels = result.iloc[2:2+predict_nums]
    correct_label = result["img_name"]
    
    for predict_label in predict_labels:
        if predict_label == correct_label:
            correct_num += 1
            break

In [16]:
accuracy = correct_num / len(result_df)
print("Top{}-Accuray = {}".format(predict_nums, accuracy))

Top1-Accuray = 0.0
