In [1]:
import pandas as pd
import csv
import sys
import ast
import random
import numpy as np
from tqdm.notebook import tqdm
sys.path.append('../src/')
from functions import from_tsv_to_list
from functions import save_list_to_tsv_file
sys.path.append("../../")
from image_size import get_image_size  # source: https://github.com/scardine/image_size

In [2]:
# images
image_dir = '../../scraped_photos_final/'

# test
try:
    test_df = pd.read_csv('../model_output/test_model_output.csv', index_col=0)
    in_test = from_tsv_to_list('../../news-navigator/test-A/in.tsv')
except:
    print('(1) No such file or directory')
# val
try:
    val_df = pd.read_csv('../model_output/val_model_output.csv', index_col=0)
    in_val = from_tsv_to_list('../../news-navigator/dev-0/in.tsv')
except:
    print('(2) No such file or directory')
# train
try:
    train_df = pd.read_csv('../model_output/train_model_output.csv', index_col=0)
    in_train = from_tsv_to_list('../../news-navigator/train/in.tsv')
except:
    print('(3) No such file or directory')


In [3]:
def parse_model_outcome(model_outcome_df, in_file, image_directory):
    # getting images size before rescale
    img_old_sizes_list = []
    for i in range(len(in_file)):
        img_width, img_height = get_image_size.get_image_size(
            image_directory + in_file[i]
        )
        img_old_sizes_list.append([img_width, img_height])
    
    model_outcome_df['old_image_size'] = img_old_sizes_list

    scaler_width, scaler_height = [], []
    for i in range(len(model_outcome_df)):
        old_image_size_width = model_outcome_df['old_image_size'][i][0]
        old_image_size_height = model_outcome_df['old_image_size'][i][1]
        new_image_size_width = np.float(ast.literal_eval(model_outcome_df['new_image_size'][i])[0][0])
        new_image_size_height = np.float(ast.literal_eval(model_outcome_df['new_image_size'][i])[0][0])

        scaler_width.append(np.float(old_image_size_width)/np.float(new_image_size_width))
        scaler_height.append(np.float(old_image_size_height)/np.float(new_image_size_height))

    out_list = []
    for i in range(len(model_outcome_df)):
        pred_labels = ast.literal_eval(model_outcome_df['predicted_labels'][i])
        pred_boxes = ast.literal_eval(model_outcome_df['predicted_boxes'][i])
        out_str = ''
        for ii in range(len(pred_labels)):        
            if int(pred_labels[ii]) == 1:
                label = 'photograph'
            elif int(pred_labels[ii]) == 2:
                label = 'illustration'
            elif int(pred_labels[ii]) == 3:
                label = 'map'
            elif int(pred_labels[ii]) == 4:
                label = 'cartoon'
            elif int(pred_labels[ii]) == 5:
                label = 'editorial_cartoon'
            elif int(pred_labels[ii]) == 6:
                label = 'headline'
            elif int(pred_labels[ii]) == 7:
                label = 'advertisement'
            x0 = str(int(round(pred_boxes[ii][0],0)*scaler_width[i]))
            y0 = str(int(round(pred_boxes[ii][1],0)*scaler_height[i]))
            x1 = str(int(round(pred_boxes[ii][2],0)*scaler_width[i]))
            y1 = str(int(round(pred_boxes[ii][3],0)*scaler_height[i]))

            out_str = out_str + f'{label}:{x0},{y0},{x1},{y1} '
        
        out_str = out_str.strip(" ")
        out_list.append(out_str)

    return out_list



In [6]:
parse_model_outcome(test_df, in_test, image_dir)

['headline:506,279,1240,877 headline:2609,435,3979,884 headline:1229,279,1919,1013 headline:1908,292,2604,898 headline:496,5518,1202,5845 headline:1887,3892,2582,4198 headline:3984,421,4642,694 advertisement:4647,5205,5348,6594 illustration:3316,925,3952,1674 headline:3957,3565,5327,3885 headline:2464,265,5332,421 illustration:4653,394,5327,2762 photograph:4647,394,5338,2796 advertisement:4664,462,5321,2830 cartoon:1930,5688,2561,6560 headline:4669,428,5311,707 headline:954,319,3531,898 advertisement:3968,3545,5359,3892',
 'advertisement:1576,5767,2934,6533 advertisement:2914,3124,4981,6579 headline:2209,323,2903,726 headline:866,2873,2234,3296 headline:182,4392,881,4835 headline:2898,317,3588,607 headline:146,317,846,733 headline:167,2378,861,2820 headline:182,5674,881,5971 advertisement:2934,1037,4926,3058 headline:2240,2827,2919,3038 headline:4287,317,4885,449 photograph:851,435,2234,2807 illustration:831,363,2214,2781 advertisement:2929,3118,4971,4617',
 'headline:4605,395,6396,900

In [4]:
# test
try:
    out_list_test = parse_model_outcome(test_df, in_test, image_dir)
    save_list_to_tsv_file('../../news-navigator/test-A/out.tsv', out_list_test)
except:
    print('(1) No such file or directory')
# val
try:
    out_list_val = parse_model_outcome(val_df, in_val, image_dir)
    save_list_to_tsv_file('../../news-navigator/dev-0/out.tsv', out_list_val)
except:
    print('(2) No such file or directory')
# train
try:
    out_list_train = parse_model_outcome(train_df, in_train, image_dir)
    save_list_to_tsv_file('../../news-navigator/train/out.tsv', out_list_train)
except:
    print('(3) No such file or directory')