In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import pickle

In [2]:
def get_csv_list(dir_path):
    csv_path_list = glob.glob(dir_path+'*.csv')
    
    return csv_path_list

In [3]:
def create_merged_dataframe(csv_path_list):
    df_all = pd.DataFrame()
    df_all['filename'] = pd.read_csv(csv_path_list[0])['filename']
    df_all['score'] = [0 for _ in range(len(df_all))]
    df_all['num_of_evaluations'] = [0 for _ in range(len(df_all))]
    df_all.sort_values(by=['filename'], inplace=True)

    for csv_path in csv_path_list:
        df_tmp = pd.read_csv(csv_path)
        df_tmp.sort_values(by=['filename'], inplace=True)
        df_all['score'] = df_all['score'] + df_tmp['score']
        df_all['num_of_evaluations'] = df_all['num_of_evaluations'] + df_tmp['num_of_evaluations']
    
    return df_all

In [30]:
def get_file_score_list_from_all_data(df_all):
    file_list = df_all['filename']
    score_tmp = list((df_all['score'] / df_all['num_of_evaluations']) // 0.2)
    score_list = np.zeros((len(score_tmp), 5), dtype=np.float32)
    for i, score in enumerate(score_tmp):
        score_list[i][int(score)] = 1.0
    
    return file_list, score_list

In [5]:
def save_file_score_list(file_list, score_list):
    try:
        with open('./pickle/file_list.pickle', 'wb') as f:
            pickle.dump(file_list, f)
        with open('./pickle/score_list.pickle', 'wb') as f:
            pickle.dump(score_list, f)
    except:
        raise

In [6]:
def load_file_score_list():
    try:
        with open('./pickle/file_list.pickle', 'rb') as f:
            file_list = pickle.load(f)
        with open('./pickle/score_list.pickle', 'rb') as f:
            scorelist = pickle.load(f)
    except:
        raise
        
    return file_list, score_list

In [34]:
def main():
    DIR_PATH = './data/all_apart_data/'
    csv_list = get_csv_list(DIR_PATH)
    df_all = create_merged_dataframe(csv_list)
    file_list, score_list = get_file_score_list_from_all_data(df_all)
    save_file_score_list(file_list, score_list)

In [35]:
if __name__=='__main__':
    main()

In [41]:
df_all.iloc[:20]


Unnamed: 0,filename,score,num_of_evaluations,mean_score
0,BJzcrQFjfsQ.jpg,18,20,0.9
1,BlSfwPHB5a_.jpg,16,20,0.8
2,BlsoWzlFesk.jpg,16,20,0.8
3,BmNTJPdHFLI.jpg,8,20,0.4
4,Bn-l8xXA1oe.jpg,3,20,0.15
5,Bn-mgscAGfH.jpg,7,20,0.35
6,Bo1lDTCBL9j.jpg,14,20,0.7
7,BoMUTAphCAL.jpg,17,20,0.85
8,BoqI5d9h1pi.jpg,7,20,0.35
9,BpEQqVMgzRJ.jpg,15,20,0.75
