<h1>Crop Data<h/>

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input, decode_predictions

In [2]:
#make destination folder for cropped images ("dev_images")
os.makedirs("dev_images")

In [3]:
#import food_cleaned.csv
food_cleaned = pd.read_csv("../food_cleaned.csv")

In [4]:
#Only selected pic with 1 object detected, having bounding box ratio above 0.25, positive aesthetic_score, real_x1 and real_y1 >= 0
selected_box_ratio = 0.25
aesthetic_score_min = 0
food_cleaned = food_cleaned[(food_cleaned['number_of_object']==1) & (food_cleaned['bbox_ratio'] >= selected_box_ratio) & (food_cleaned['aesthetic_score'] >= aesthetic_score_min) & (food_cleaned['real_x1'] >= 0) & (food_cleaned['real_y1'] >= 0)]

In [5]:
food_cleaned.reset_index(inplace = True, drop = True)

In [6]:
food_cleaned

Unnamed: 0,aesthetic_score,photo_eid,pic_url,product_id,product_name,res_id,res_name,number_of_object,bbox_ratio,real_x1,real_x2,real_y1,real_y2
0,0.617797,406260216c71469e834a5635fec6bac3,https://img.wongnai.com/p/1920x0/2016/02/16/40...,1181038.0,ส้มตำ,215872.0,Isaan and Brew,1.0,0.270742,399.0,871.0,181.0,581.0
1,0.598124,c58c86dad8d24fffaa461efbb5286620,https://img.wongnai.com/p/1920x0/2017/11/09/c5...,1668755.0,ส้มตำ,298416.0,ล้านต๋ำส้ม,1.0,0.641056,153.0,1691.0,153.0,1307.0
2,0.596057,e70383a17c85421581cd5e0aa9dcef1b,https://img.wongnai.com/p/1920x0/2018/10/11/e7...,10271622.0,ส้มตำ,303852.0,หมูปลาร้าสายซิ่ง,1.0,0.437967,270.0,1689.0,270.0,1013.0
3,0.591579,d6f00625c28645d98d0bbaee7a9d9608,https://img.wongnai.com/p/1920x0/2017/05/30/d6...,553634.0,ส้มตำ,9882.0,เป็นลาว,1.0,0.454240,85.0,653.0,113.0,454.0
4,0.591075,8f8443b79f6c4225b686c8fdca1a5a6f,https://img.wongnai.com/p/1920x0/2017/10/12/8f...,1532062.0,ส้มตำ,289648.0,แซ่บซี่ by อุดร,1.0,0.354295,540.0,1687.0,337.0,1080.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
46717,0.042405,59fa9ae3f98e40249d3926f5edc0bd32,https://img.wongnai.com/p/1920x0/2017/12/16/59...,1726841.0,ไส้อั่ว,219193.0,ไก่ทอดจอย ข้างสถานีรถไฟเชียงใหม่,1.0,0.735833,38.0,921.0,76.0,652.0
46718,0.041342,2cbc74c4fc474b5d9b58151ea5546b27,https://img.wongnai.com/p/1920x0/2016/12/29/2c...,1338636.0,ไส้อั่ว,260435.0,ไส้อั่ว อุ้ยเพชร,1.0,0.942844,153.0,1459.0,0.0,1996.0
46719,0.137491,5b403c49e20542209463a53a466b0464,https://img.wongnai.com/p/1920x0/2016/04/06/5b...,2798080.0,ข้าวหน้าเนื้อ,11728.0,Yoshinoya,1.0,0.737438,81.0,1024.0,81.0,696.0
46720,0.136769,5649ea59c6e34904b1f78fa0aa8b567e,https://img.wongnai.com/p/1920x0/2017/10/17/56...,8413072.0,ข้าวหน้าเนื้อ,236814.0,Oishi Ramen โออิชิราเมน,1.0,0.688656,153.0,1612.0,230.0,1535.0


In [7]:
#Prepare lists for using multiprocessing
photo_eids = food_cleaned.photo_eid.values.tolist()
real_y1s = food_cleaned.real_y1.values.tolist()
real_y2s = food_cleaned.real_y2.values.tolist()
real_x1s = food_cleaned.real_x1.values.tolist()
real_x2s = food_cleaned.real_x2.values.tolist()

In [8]:
#Define a multiprocess function to crop and save images 
def crop_image_parallel(i):
    img_path =  "../wongnai_images/" + str(photo_eids[i]) + '.jpg'
    img = image.load_img(img_path)
    array_img = image.img_to_array(img)/255
    try:
        pic = array_img[int(real_y1s[i]) : int(real_y2s[i]) , int(real_x1s[i]) : int(real_x2s[i]), :]
        matplotlib.image.imsave('dev_images/' + str(photo_eids[i]) + '.jpg', pic)
    except ValueError:
        matplotlib.image.imsave('dev_images/' + str(photo_eids[i]) + '.jpg', array_img)

In [9]:
#Perform crop and save using multiprocessing
num_processors = 8
import multiprocessing, functools
with multiprocessing.Pool(num_processors) as p:
    list(tqdm(p.imap(crop_image_parallel, list(range(len(food_cleaned)))), total = len(food_cleaned)))

100%|██████████| 46722/46722 [17:27<00:00, 44.62it/s]


In [10]:
# #For window that cannot use multiprocessing
# def crop_image(food_cleaned):
#     for i in tqdm(range(len(food_cleaned))):
#         img_path =  "../wongnai_images/" + str(food_cleaned['photo_eid'][i]) + '.jpg'
#         img = image.load_img(img_path)
#         array_img = image.img_to_array(img)/255
#         try:
#             pic = array_img[int(food_cleaned['real_y1'][i]) : int(food_cleaned['real_y2'][i]) , int(food_cleaned['real_x1'][i]) : int(food_cleaned['real_x2'][i]), :]
#             matplotlib.image.imsave('dev_images/' + str(food_cleaned['photo_eid'][i]) + '.jpg', pic)
#         except ValueError:
#             matplotlib.image.imsave('dev_images/' + str(food_cleaned['photo_eid'][i]) + '.jpg', array_img)

<h1> Clean Data <h/>

In [11]:
#Print all product_name
print(np.sort(food_cleaned.product_name.unique()))

['Pizza' 'Salmon Sashimi' 'honey toast' 'กระเพาะปลา' 'กุ้งอบวุ้นเส้น'
 'กุ้งเผา' 'กุ้งแช่น้ำปลา' 'ก๋วยจั๊บ' 'ก๋วยจั๊บญวน' 'ก๋วยเตี๋ยวคั่วไก่'
 'ก๋วยเตี๋ยวต้มยำ' 'ก๋วยเตี๋ยวเรือ' 'ขนมจีน' 'ขนมจีบ' 'ขนมถ้วย' 'ขนมปัง'
 'ขนมปังปิ้ง' 'ขนมปังสังขยา' 'ขาหมูเยอรมัน' 'ข้าวขาหมู' 'ข้าวคลุกกะปิ'
 'ข้าวซอยไก่' 'ข้าวผัด' 'ข้าวผัดกระเทียม' 'ข้าวมันไก่' 'ข้าวหน้าเนื้อ'
 'ข้าวหน้าเป็ด' 'ข้าวหมกไก่' 'ข้าวหมูกรอบ' 'ข้าวหมูแดง' 'ข้าวเหนียวมะม่วง'
 'คอหมูย่าง' 'ชาบู' 'ซุปเห็ด' 'ซูชิ' 'ตับหวาน' 'ติ่มซำ' 'ต้มยำ'
 'ต้มเลือดหมู' 'ต้มแซ่บกระดูกอ่อน' 'ทอดมันกุ้ง' 'ทอดมันปลากราย'
 'ทาโกะยากิ' 'น้ำตกหมู' 'น้ำพริกไข่ปู' 'บะหมี่แห้ง' 'ปลากระพงทอดน้ำปลา'
 'ปลากระพงนึ่งมะนาว' 'ปลากะพงทอดน้ำปลา' 'ปลาหมึกผัดไข่เค็ม' 'ปอเปี๊ยะทอด'
 'ปูนิ่มทอดกระเทียม' 'ปูผัดผงกะหรี่' 'ปูม้านึ่ง' 'ผักโขมอบชีส'
 'ผัดไทกุ้งสด' 'ยำถั่วพลู' 'ยำปลาดุกฟู' 'ยำวุ้นเส้น' 'ยำสาหร่าย' 'ยำหมูยอ'
 'ยำแซลมอน' 'ลาบ' 'สปาเก็ตตี้ขี้เมาทะเล' 'สปาเก็ตตี้คาโบนาร่า' 'สลัด'
 'สเต็กหมู' 'ส้มตำ' 'หมูกรอบ' 'หมูมะนาว' 'หมูสะเต๊ะ' 'หมูแดดเดียว'
 'หอยนางรม' 'หอยแคร

In [12]:
#Merge these two classes to one with correct spelling
food_cleaned = food_cleaned.replace('ปลากะพงทอดน้ำปลา', 'ปลากระพงทอดน้ำปลา')

In [13]:
#Remove product_name = 'ซูชิ' because this class is not cleaned, it consists of sashimi, maki, sushi, roll, etc.
food_cleaned = food_cleaned[food_cleaned['product_name'] != 'ซูชิ']

In [14]:
food_cleaned

Unnamed: 0,aesthetic_score,photo_eid,pic_url,product_id,product_name,res_id,res_name,number_of_object,bbox_ratio,real_x1,real_x2,real_y1,real_y2
0,0.617797,406260216c71469e834a5635fec6bac3,https://img.wongnai.com/p/1920x0/2016/02/16/40...,1181038.0,ส้มตำ,215872.0,Isaan and Brew,1.0,0.270742,399.0,871.0,181.0,581.0
1,0.598124,c58c86dad8d24fffaa461efbb5286620,https://img.wongnai.com/p/1920x0/2017/11/09/c5...,1668755.0,ส้มตำ,298416.0,ล้านต๋ำส้ม,1.0,0.641056,153.0,1691.0,153.0,1307.0
2,0.596057,e70383a17c85421581cd5e0aa9dcef1b,https://img.wongnai.com/p/1920x0/2018/10/11/e7...,10271622.0,ส้มตำ,303852.0,หมูปลาร้าสายซิ่ง,1.0,0.437967,270.0,1689.0,270.0,1013.0
3,0.591579,d6f00625c28645d98d0bbaee7a9d9608,https://img.wongnai.com/p/1920x0/2017/05/30/d6...,553634.0,ส้มตำ,9882.0,เป็นลาว,1.0,0.454240,85.0,653.0,113.0,454.0
4,0.591075,8f8443b79f6c4225b686c8fdca1a5a6f,https://img.wongnai.com/p/1920x0/2017/10/12/8f...,1532062.0,ส้มตำ,289648.0,แซ่บซี่ by อุดร,1.0,0.354295,540.0,1687.0,337.0,1080.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
46717,0.042405,59fa9ae3f98e40249d3926f5edc0bd32,https://img.wongnai.com/p/1920x0/2017/12/16/59...,1726841.0,ไส้อั่ว,219193.0,ไก่ทอดจอย ข้างสถานีรถไฟเชียงใหม่,1.0,0.735833,38.0,921.0,76.0,652.0
46718,0.041342,2cbc74c4fc474b5d9b58151ea5546b27,https://img.wongnai.com/p/1920x0/2016/12/29/2c...,1338636.0,ไส้อั่ว,260435.0,ไส้อั่ว อุ้ยเพชร,1.0,0.942844,153.0,1459.0,0.0,1996.0
46719,0.137491,5b403c49e20542209463a53a466b0464,https://img.wongnai.com/p/1920x0/2016/04/06/5b...,2798080.0,ข้าวหน้าเนื้อ,11728.0,Yoshinoya,1.0,0.737438,81.0,1024.0,81.0,696.0
46720,0.136769,5649ea59c6e34904b1f78fa0aa8b567e,https://img.wongnai.com/p/1920x0/2017/10/17/56...,8413072.0,ข้าวหน้าเนื้อ,236814.0,Oishi Ramen โออิชิราเมน,1.0,0.688656,153.0,1612.0,230.0,1535.0


In [15]:
#Save the processed file as food_cleaned_cropped.csv
food_cleaned.to_csv('food_cleaned_cropped.csv', index=False)