## 1. 패키지 로드

In [1]:
import cv2
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import re
import dlib

## 2. 이미지 클래스 정의

In [2]:
class CImg:
    def __init__(self):
        '''init class'''
        pass
  
    def img_to_rgb(self, image):
        '''convert image to rgb'''
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        return image

    def img_to_bgr(self, image):
        '''convert image to bgr'''
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        return image

    def img_adjust(self, image, brightness = 0, contrast = 30):
        '''adjust image'''
        image = np.int16(image)
        image = image * (contrast / 127 + 1) - contrast + brightness
        image = np.clip(image, 0, 255)
        image = np.uint8(image)

        return image

    def img_resize(self, image, size = 256):
        '''resize image'''
        image = cv2.resize(image, (size, size))

        return image
  
    def img_face_align(self, image, size = 256, padding = 0.65):
        '''face align image'''
        detector = dlib.get_frontal_face_detector()
        sp = dlib.shape_predictor('../util/shape_predictor_5_face_landmarks.dat')
        dets = detector(image)
        if dets:
            pass
        else:
            #print("No detect face")
            return image
        s = sp(image, dets[0])
        image = dlib.get_face_chip(image, s, size=size, padding=padding)

        return image

    def img_compare(self, image_1, image_2, ratio=0.75, show=False):
        '''compare image'''
        # Initiate SIFT detector
        orb = cv2.ORB_create()

        # find the keypoints and descriptors with SIFT
        kp1, des1 = orb.detectAndCompute(image_1, None)
        kp2, des2 = orb.detectAndCompute(image_2, None)

        # create BFMatcher object
        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

        # Match descriptors.
        matches = bf.match(des1,des2)

        # Sort them in the order of their distance.
        matches = sorted(matches, key = lambda x:x.distance)

        # BFMatcher with default params
        bf = cv2.BFMatcher()
        matches = bf.knnMatch(des1, des2, k=2)

        # Apply ratio test
        good = []
        for m,n in matches:
            if m.distance < ratio * n.distance:
                good.append([m])

        # Draw first 10 matches.
        knn_image = cv2.drawMatchesKnn(image_1, kp1, image_2, kp2, good, None, flags=2)
        if show:
            plt.imshow(knn_image)
            plt.show()

        return len(good)

    def img_show(self, image):
        '''show image'''
        plt.figure(figsize=(16, 10))
        plt.imshow(image)

    def run_image_compare(self, image_1, image_2, size = 256, padding = 0.65, ratio = 0.75, brightness = 0, contrast = 30, show=False):
        '''compare image'''
        image_1 = self.img_to_rgb(image_1)
        image_1 = self.img_adjust(image_1, brightness=brightness, contrast=contrast)
        image_1 = self.img_resize(image_1, size=size)
        image_1 = self.img_face_align(image_1, size=size, padding=padding)
        image_1 = self.img_to_bgr(image_1)
        image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)

        image_2 = self.img_to_rgb(image_2)
        image_2 = self.img_adjust(image_2, brightness=brightness, contrast=contrast)
        image_2 = self.img_resize(image_2, size=size)
        image_2 = self.img_face_align(image_2, size=size, padding=padding)
        image_2 = self.img_to_bgr(image_2)
        image_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)

        n_similarity = self.img_compare(image_1, image_2, ratio=0.75, show=show)

        return n_similarity
    
    def run(self, actor, webtoon, actor_image_path, webtoon_image_path):
        '''calculate all actor images and webtoon images similarity '''
        actor_df = actor.copy()
        webtoon_df = webtoon.copy()
        
        # webtoon name list and actor name list
        webtoon_names = webtoon_df['Title'].unique()
        actor_name_list = actor_df['Name'].to_list()
        actor_male_name_list = actor_df[actor_df['Sex'] == '남'].Name.to_list()
        actor_female_name_list = actor_df[actor_df['Sex'] == '여'].Name.to_list()
        
        # result dataframe
        result_df = actor_df[['Name', 'Age', 'Sex']]
        
        for webtoon_name in tqdm_notebook(webtoon_names):
            try:
                # get characters's names from webtoon name
                charater_names = webtoon_df[webtoon_df['Title'] == webtoon_name]['Character'].to_list()

                # calculate actors's images and characters's images
                for charater_name in charater_names:
                    try:
                        # character's image load 
                        character_image = cv2.imread(f'{webtoon_image_path}/{webtoon_name}_{charater_name}.jpg', flags=cv2.IMREAD_COLOR)

                        # get character's sex, age
                        character_sex = webtoon_df[webtoon_df['Character'] == charater_name]['Sex'].values[0]
                        character_age = webtoon_df[webtoon_df['Character'] == charater_name]['Age'].values[0]
                        character_age = int(re.sub('[^0-9]', '', character_age))
                    
                        # calculation according to sex
                        if character_sex == '남':
                            for actor_name in actor_male_name_list:
                                actor_age = int(actor_df[actor_df['Name'] == actor_name]['Age'].values[0])
                                if (character_age < actor_age) and (character_age + 20 > actor_age):
                                    actor_image = cv2.imread(f'{actor_image_path}/webtoon_{actor_name}.jpg', flags=cv2.IMREAD_COLOR)
                                    n_sim = self.run_image_compare(actor_image, character_image)
                                    result_df.loc[(result_df['Name'] == actor_name), f'{webtoon_name}_{charater_name}_image_score'] = n_sim
                        elif character_sex == '여':
                            for actor_name in actor_female_name_list:
                                actor_age = int(actor_df[actor_df['Name'] == actor_name]['Age'].values[0])
                                if (character_age < actor_age) and (character_age + 20 > actor_age):
                                    actor_image = cv2.imread(f'{actor_image_path}/webtoon_{actor_name}.jpg', flags=cv2.IMREAD_COLOR)
                                    n_sim = self.run_image_compare(actor_image, character_image)
                                    result_df.loc[(result_df['Name'] == actor_name), f'{webtoon_name}_{charater_name}_image_score'] = n_sim
                        else: # if, sex == NaN
                            for actor_name in actor_name_list:
                                actor_age = int(actor_df[actor_df['Name'] == actor_name]['Age'].values[0])
                                if (character_age < actor_age) and (character_age + 20 > actor_age):
                                    actor_image = cv2.imread(f'{actor_image_path}/webtoon_{actor_name}.jpg', flags=cv2.IMREAD_COLOR)
                                    n_sim = self.run_image_compare(actor_image, character_image)
                                    result_df.loc[(result_df['Name'] == actor_name), f'{webtoon_name}_{charater_name}_image_score'] = n_sim
                    except:
                        print(f'{charater_name} Error')
            except Exception as e:
                print(e)
                print(f'{webtoon_name} Error')
                
        
        return result_df

## 3. csv file load

In [3]:
actor_data = pd.read_csv('../text_data/Actor.csv', encoding='utf-8-sig')
webtoon_data = pd.read_csv('../text_data/Webtoon.csv', encoding='utf-8-sig')

In [4]:
actor_data.head(2)

Unnamed: 0,Name,Age,Sex,수상내역,드라마활동,activation_content,사진url,배역내용,"제목, 내용"
0,감우성,52,남,"['2018 SBS 연기대상 대상', '2018 SBS 연기대상 베스트 커플상', ...",['바람이 분다/주연/권도훈 역/https://search.naver.com/sea...,"MBC 20기 공채 탤런트로 배우 생활을 시작했으며 동기로 한석규, 박철, 차광수,...",https://search.pstatic.net/common?type=b&size=...,['첫사랑 수진과 열렬한 연애 끝에 결혼까지 성공했다. 남에게 피해 주지 않고 올곧...,"감우성 닮으면 잘생긴거냐?,내 친구 미용실가서 누나가 감우성 닮았다는데 잘생긴거냐 ..."
1,강경준,39,남,"['2017 MBC 연기대상 연속극부문 남자 우수연기상', '2004 MBC 방송연...",['별별 며느리/주연/최한주 역/https://search.naver.com/sea...,"초등학교 시절에는 야구, 중학교 시절에는 농구선수였었다. 농구를 소재로 한 드라마 ...",https://search.pstatic.net/common?type=b&size=...,"['흙수저? 아니, 몸짱 얼짱 마음까지 짱인 태권도장 사범. 얼굴만 봐도 신뢰감이 ...","강경준 -> 강 산 -> 강승호네,이름 두번이나 바꿨구나\n그러고보니 남다 딱 장신..."


In [5]:
webtoon_data.head(2)

Unnamed: 0,Title,Character,Sex,Age,설명
0,폭풍의전학생,주인공,남,10대,최완서를 얼굴에 니킥을 작렬시켜 한방에 보내버리고 정해인을 박치기 한다음에 머리를 ...
1,폭풍의전학생,최완서,남,10대,전 3반짱. 묘한 색깔의 피부[7]와 부서진 콧잔등이 포인트. 전설의 레전드의 최대...


## 4. 이미지 유사도 계산

In [6]:
actor_image_path = '../image_data/Actor2Webtoon'
webtoon_image_path = '../image_data/Webtoon'

In [7]:
cImg = CImg()

In [8]:
result_df = cImg.run(actor_data, webtoon_data, actor_image_path, webtoon_image_path)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for webtoon_name in tqdm_notebook(webtoon_names):


  0%|          | 0/71 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


공기영 Error
벽창오 Error
dj.8(팔Eight) Error
독고연 Error
최개발 Error
밥 Error
도동남 Error
우사미 Error
백지미 Error
붕어머리남자 Error
예민해 Error
점쟁이아가씨 Error
전진수 Error
Dr.에니그마 박사 Error
오강남 Error
종건 Error
동동배 Error
두태양 Error
이중현 Error
유수민 Error
주수애 Error
국사 선생님 Error
교장선생님 Error
김두만 Error
차택기 Error
사채업자 Error
상이 신세진 노인 Error
천존 Error
지동대신 Error
홍명희 Error
정소진 Error
이름없는링커 Error
라이트닝의 사이드킥 Error
지나 엘리슨(블러드팩) Error
한준성 Error
정지수 Error
우슬기 Error


  self.obj[key] = infer_fill_value(value)


국회의원 정국천 Error
최우남 Error
베네로 플란넬(하기스의 아버지) Error
김석훈 Error
패션왕 Error
시각 장애인 때밀이 Error
여탕 팀장 Error
여 회장 Error
꼬마 Error
하일권 Error
오지란(스피커) Error
한세진 Error
이수빈 Error
윤현석 Error
장호재 Error
송예린 Error
라자크 케르티아 Error
담임 선생님 Error
이백합 Error
장노란 Error
이태양 Error
김유리 Error
박현진 Error
송재민 Error
이경민 Error
윤지수 Error
이양선 Error
나유진 Error
박하늘 Error
이백합의 아버지 Error
이백합의 어머니 Error
진재현 Error
박예진 Error
김달수 Error
양채모 Error
마적두 Error
장영식 Error
박봉녀 Error
이물식 Error
박화수 Error
정명구 Error
한해나 Error
진서원 Error
이보겸 Error
윤채아 Error
김송이 Error
최율 Error
김지원 Error
현재희 Error
지아 Error
한유나 Error
한해나의 엄마 Error
한성호 Error
최율의 엄마 Error
최율의 아빠 Error
최율의 할아버지 Error
진서원의 엄마 Error
한유나의 전남친 Error
강은환 Error
소년 Error
막순이 Error
초영이 Error
조아연 Error
손우진 Error
장근재 Error
홍승복 Error
안동민 Error
김자홍 Error
진기한 Error
유성연 Error
중대장 Error
소대장 Error
염라대왕 Error
문오증 Error
육경열 Error
박수환 Error
이동은 Error
홍도연 Error
오동석 Error
김희진 Error
변동출 Error
조만섭 Error
힙제이 Error
김비서 Error
편희정 Error
최민규 Error
심봉희 Error
서민기 Error
권기혁 Error
이광재 Error
노준석 Error
권설하 Error
윤설하 Err

## 5. 데이터 프레임 다듬기

In [10]:
result_df.to_csv('../text_data/feature_match_score_df.csv', encoding='utf-8-sig', index=False)

In [11]:
result_df.head()

Unnamed: 0,Name,Age,Sex,폭풍의전학생_주인공_image_score,폭풍의전학생_최완서_image_score,폭풍의전학생_김대용_image_score,폭풍의전학생_정해인_image_score,폭풍의전학생_황인규_image_score,폭풍의전학생_이연희_image_score,폭풍의전학생_조규식_image_score,...,노블레스_프랑켄슈타인_image_score,노블레스_레지스 K 란데그르_image_score,노블레스_세이라 J 로이아드_image_score,노블레스_라엘 케르티아_image_score,노블레스_카리어스 블러스터_image_score,노블레스_M-21_image_score,노블레스_타키오_image_score,노블레스_타오_image_score,전설의 레전드_전설_image_score,전설의 레전드_문태식_image_score
0,감우성,52,남,,,,,,,,...,,,,,4.0,,,,,
1,강경준,39,남,,,,,,,4.0,...,2.0,,,3.0,,1.0,1.0,4.0,,2.0
2,강기영,39,남,,,,,,,2.0,...,2.0,,,2.0,,4.0,0.0,1.0,,5.0
3,강남길,64,남,,,,,,,,...,,,,,,,,,,
4,강지섭,41,남,,,,,,,1.0,...,3.0,,,,6.0,,,,,
