### 各要素（車番、分類番号、地域名、ひらがな）の識別結果をマージし、プレート全体での正答率を算出

In [1]:
import pandas as pd
import numpy as np

import os
import glob
from tqdm import tqdm

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.family'] = 'Arial Unicode MS'
import seaborn as sns; sns.set_palette('husl')

from PIL import Image
from IPython.display import display

### 個々の評価結果をマージ

In [2]:
df_large_num_result = pd.read_csv("./results/1106/large_num_result.csv", encoding="utf-8-sig")
df_small_num_result = pd.read_csv("./results/1106/small_num_result.csv",encoding="utf-8-sig")
df_region_result = pd.read_csv("./results/1106/region_result.csv",encoding="utf-8-sig")
df_hiragana_result = pd.read_csv("./results/1106/hiragana_result.csv",encoding="utf-8-sig")

FileNotFoundError: [Errno 2] File b'./results/1106/large_num_result.csv' does not exist: b'./results/1106/large_num_result.csv'

In [None]:
result_merged = pd.merge(df_large_num_result, df_small_num_result, on="img_name")
result_merged = pd.merge(result_merged, df_region_result, on="img_name")
result_merged = pd.merge(result_merged, df_hiragana_result, on="img_name")

In [None]:
#small numのtrue labelが30または31のデータを除外する
exclude_label = ["30", "31"]
result_merged = result_merged[~result_merged["true_label_small"].isin(exclude_label)]

In [None]:
result_merged.head()

## 評価実施

### validデータ全体での評価

In [None]:
true_cnt = 0
for idx_, row_ in result_merged.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_cnt += 1

In [None]:
print("全桁一致率:{:.2f}%".format(true_cnt / len(result_merged) * 100))

### 店舗、サイズ情報を含むCSVファイルと識別結果をマージ

In [None]:
#../の箇所は1分補正フォーマットのダウンロード先に合わせて変更していただければ
df_np_detect_1 = pd.read_csv("../../../../../Downloads/1分補正_プレートのサイズ付与済みデータ.csv", encoding="utf-8-sig")
df_np_detect_2 = pd.read_csv("../../../../../Downloads/1分補正_プレートのサイズ付与済みデータ_8月.csv", encoding="utf-8-sig")
df_np_detect_3 = pd.read_csv("../../../../../Downloads/1分補正_プレートのサイズ付与済みデータ_9月後半.csv", encoding="utf-8-sig")

df_np_detect = pd.concat([df_np_detect_1, df_np_detect_2, df_np_detect_3], axis =0).reset_index(drop=True) 

In [None]:
df_np_detect.head()

In [None]:
df_np_detect = df_np_detect.drop(columns=["画像の元パス"])
df_np_detect = df_np_detect.rename(columns={"画像名":"img_name"})

In [None]:
#こちらもimg_path_masterのダウンロード先に合わせてパス変更していただければ
df_img_path = pd.read_csv("../../../../../Downloads/img_path_master.csv", encoding="utf-8-sig")

In [None]:
df_np_detect = pd.merge(df_np_detect, df_img_path[["img_path","img_name"]], on="img_name")

In [None]:
valid_imgs = result_merged["img_name"].unique().tolist()
df_target_np = df_np_detect[df_np_detect["img_name"].isin(valid_imgs)]

In [None]:
result_merged_all = pd.merge(result_merged, df_target_np, on="img_name")

In [None]:
result_merged_all.head()

In [None]:
# validデータの店舗の分布をcheck
data_counts = result_merged_all['物件名'].value_counts()[:20]
plt.figure(figsize=(5, 8))
sns.barplot(x=data_counts.values, y=data_counts.index)
plt.title('店舗別データの分布', fontsize=15)
plt.show()

### 店舗毎の精度を確認

In [None]:
shop_list = list(result_merged_all['物件名'].value_counts()[:20].index)

In [None]:
data_list = []

for each_shop in shop_list:
    df_target = result_merged_all[result_merged_all["物件名"] == each_shop]

    true_cnt = 0
    true_cnt_large = 0
    true_cnt_small = 0
    true_cnt_region = 0
    true_cnt_hiragana = 0
    for idx_, row_ in df_target.iterrows():
        if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
            true_cnt += 1
        if (row_["pred_label_large"] == row_["true_label_large"]):
            true_cnt_large += 1
        if (row_["pred_label_small"] == row_["true_label_small"]):
            true_cnt_small += 1
        if (row_["pred_label_region"] == row_["true_label_region"]):
            true_cnt_region += 1
        if (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
            true_cnt_hiragana += 1
            

    print("店舗:{}, 全桁一致率:{:.2f}%".format(each_shop, true_cnt / len(df_target) * 100))
    
    data_dict = {"店舗":each_shop,
                "全桁一致":true_cnt / len(df_target) * 100,
                "車番一致":true_cnt_large / len(df_target) * 100,
                "分類番号一致":true_cnt_small / len(df_target) * 100,
                "ひらがな一致":true_cnt_hiragana / len(df_target) * 100, 
                "地域名一致":true_cnt_region / len(df_target) * 100}
    data_list.append(data_dict)

In [None]:
df_ = pd.DataFrame(data_list, columns=data_list[0].keys())

In [None]:
df_.set_index("店舗", inplace=True)

In [None]:
plt.figure(figsize=(12, 10))

sns.heatmap(df_, annot=True, cmap='YlOrBr')
plt.show()

In [None]:
#車番のみの一致
for each_shop in shop_list:
    df_target = result_merged_all[result_merged_all["物件名"] == each_shop]

    true_cnt = 0
    for idx_, row_ in df_target.iterrows():
        if (row_["pred_label_large"] == row_["true_label_large"]):
            true_cnt += 1

    print("店舗:{}, 車番認識率:{:.2f}%".format(each_shop, true_cnt / len(df_target) * 100))

In [None]:
#分類番号のみの一致
for each_shop in shop_list:
    df_target = result_merged_all[result_merged_all["物件名"] == each_shop]

    true_cnt = 0
    for idx_, row_ in df_target.iterrows():
        if (row_["pred_label_small"] == row_["true_label_small"]):
            true_cnt += 1

    print("店舗:{}, 分番認識率:{:.2f}%".format(each_shop, true_cnt / len(df_target) * 100))

In [None]:
#地域名のみの一致
for each_shop in shop_list:
    df_target = result_merged_all[result_merged_all["物件名"] == each_shop]

    true_cnt = 0
    for idx_, row_ in df_target.iterrows():
        if (row_["pred_label_region"] == row_["true_label_region"]):
            true_cnt += 1

    print("店舗:{}, 地名認識率:{:.2f}%".format(each_shop, true_cnt / len(df_target) * 100))

In [None]:
#イオン幕張の不正解データ
#df_target = result_merged_all[result_merged_all["物件名"] =='イオン幕張']
#true_img_path = []
#false_img_path = []

#for idx_, row_ in df_target.iterrows():
#    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
#        true_img_path.append(row_["img_path"])
#    else:
#        false_img_path.append(row_["img_path"])

In [None]:
#for each_img in false_img_path:
#    df_ =df_target[df_target["img_path"] == each_img]
#    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
#    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
#    display(title_)
#    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン相模原ショッピングセンターの不正解データ
#df_target = result_merged_all[result_merged_all["物件名"] =='イオン相模原ショッピングセンター']
#true_img_path = []
#false_img_path = []

#for idx_, row_ in df_target.iterrows():
#    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
#        true_img_path.append(row_["img_path"])
#    else:
#        false_img_path.append(row_["img_path"])

In [None]:
#for each_img in false_img_path:
#    df_ =df_target[df_target["img_path"] == each_img]
#    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
#    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
#    display(title_)
#    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン金沢八景の不正解データ
#df_target = result_merged_all[result_merged_all["物件名"] =='イオン金沢八景']
#true_img_path = []
#false_img_path = []

#for idx_, row_ in df_target.iterrows():
#    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
#        true_img_path.append(row_["img_path"])
#    else:
#        false_img_path.append(row_["img_path"])

In [None]:
#for each_img in false_img_path:
#    df_ =df_target[df_target["img_path"] == each_img]
#    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
#    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
#    display(title_)
#    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン東雲店の不正解データ
#出来ればやっていただきたいですが途中まではやってあるので後回しで大丈夫です。
#時間がなかったり面倒ならやらなくても大丈夫です。
df_target = result_merged_all[result_merged_all["物件名"] =='イオン東雲店']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン稲毛の不正解データ
#店舗毎のデータはここから実行して欲しいです。
df_target = result_merged_all[result_merged_all["物件名"] =='イオン稲毛']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
#表示される結果と画像をスクショ等の形で全て画像の形にしていただきたいです。
#1データ毎にスクショでも複数データ分スクショでも大丈夫です。
#例によってパスはアーカイブフォルダをダウンロードした先に合わせて変更していただければと思います。
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオンスタイル仙台卸町店の不正解データ
df_target = result_merged_all[result_merged_all["物件名"] =='イオンスタイル仙台卸町店']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_name"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン板橋ショッピングセンターの不正解データ
df_target = result_merged_all[result_merged_all["物件名"] =='イオン板橋ショッピングセンター']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#関西スーパー京阪大和田１Fの不正解データ
df_target = result_merged_all[result_merged_all["物件名"] =='関西スーパー京阪大和田１F']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#マミーマート船橋日大[地上]の不正解データ
df_target = result_merged_all[result_merged_all["物件名"] =='マミーマート船橋日大[地上]']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))

In [None]:
#イオン高根木戸の不正解データ
df_target = result_merged_all[result_merged_all["物件名"] =='イオン高根木戸']
true_img_path = []
false_img_path = []

for idx_, row_ in df_target.iterrows():
    if (row_["pred_label_large"] == row_["true_label_large"]) & (row_["pred_label_small"] == row_["true_label_small"]) & (row_["pred_label_region"] == row_["true_label_region"]) & (row_["pred_label_hiragana"] == row_["true_label_hiragana"]):
        true_img_path.append(row_["img_path"])
    else:
        false_img_path.append(row_["img_path"])

In [None]:
for each_img in false_img_path:
    df_ =df_target[df_target["img_path"] == each_img]
    title_ = "正解：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["true_label_large"], df_.iloc[0]["true_label_small"], df_.iloc[0]["true_label_region"], df_.iloc[0]["true_label_hiragana"])
    title_ = title_ + "   予測：車番:{}, 分類番号:{}, 地域名:{}, ひらがな:{}".format(df_.iloc[0]["pred_label_large"], df_.iloc[0]["pred_label_small"], df_.iloc[0]["pred_label_region"], df_.iloc[0]["pred_label_hiragana"])
    display(title_)
    display(Image.open("../../../../../Downloads/アーカイブ"+each_img.split('images')[1]))