In [11]:
import tqdm

def nop(it, *a, **k):
    return it

real_tqdm = tqdm.tqdm
tqdm.tqdm = nop

import scipy
scipy.sparse.csr.csr_matrix = scipy.sparse.csr_matrix

import time
import os
import glob
import pickle
from typing import Union, List
import copy

import numpy as np
np.bool = np.bool_
import cv2
import torch
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import torch.nn.functional as F

from utils.inference.image_processing import crop_face, get_final_image, show_images, normalize_and_torch, normalize_and_torch_batch
from utils.inference.video_processing import read_video, get_target, get_final_video, add_audio_from_another_video, face_enhancement, crop_frames_and_get_transforms, resize_frames
from utils.inference.core import model_inference, transform_target_to_torch
from utils.inference.faceshifter_run import faceshifter_batch, faceshifter_batch_zattrs
from network.AEI_Net import AEI_Net
from coordinate_reg.image_infer import Handler
from insightface_func.face_detect_crop_multi import Face_detect_crop
from arcface_model.iresnet import iresnet100
from models.pix2pix_model import Pix2PixModel
from models.config_sr import TestOptions

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.backends.cudnn.benchmark = True

  mirr = onp.mirr
  npv = onp.npv
  pmt = onp.pmt
  ppmt = onp.ppmt
  pv = onp.pv
  rate = onp.rate
  cur_np_ver = LooseVersion(_np.__version__)
  np_1_17_ver = LooseVersion('1.17')
  cur_np_ver = LooseVersion(_np.__version__)
  np_1_15_ver = LooseVersion('1.15')


In [2]:
class SinglePCA:
    def __init__(self, layer_id, root_path="./pca_pkl", use_norm=False):
        self.layer_id = layer_id
        self.use_norm = use_norm # -1~1
        with open(f"{root_path}/Altered_zattr{self.layer_id}PCA.pkl", "rb") as file:
            self.pca = pickle.load(file)
        with open(f"{root_path}/Altered_zattr{self.layer_id}PCAMinMax.pkl", "rb") as file:
            minmax = pickle.load(file)
        self.pca_min = minmax["min"]
        self.pca_max = minmax["max"]
    
    def transform(self, z_embed:np.ndarray) -> np.ndarray:
        return self.pca.transform(z_embed)
    
    def inverse_transform(self, p_emb:np.ndarray) -> np.ndarray:
        return self.pca.inverse_transform(p_emb)

    def calcul_z_embed_diff(self, target:np.ndarray, original:np.ndarray)->np.ndarray:
        target = self.inverse_transform(target)
        original = self.inverse_transform(original)
        return target-original
    
    def inject_pca_param(self, orig_z_embeds: List[np.ndarray], pca_param:dict, add=False)->List[np.ndarray]:
        p_emb = self.transform(orig_z_embeds[self.layer_id])
        orig_p_emb = p_emb.copy()
        if self.use_norm:
            for k in pca_param:
                if add:
                    p_emb[:, k] += pca_param[k] * (self.pca_max[k]-self.pca_min[k])/2
                else:
                    p_emb[:, k] = self.pca_min[k] + (pca_param[k]+1)/2*(self.pca_max[k]-self.pca_min[k])
        else:
            for k in pca_param:
                p_emb[:, k] = pca_param[k]+p_emb[:, k]*int(add)
        res_z_embeds = copy.deepcopy(orig_z_embeds)
        res_z_embeds[self.layer_id] += self.calcul_z_embed_diff(p_emb, orig_p_emb)
        return res_z_embeds

class DoublePCA:
    def __init__(self, layer_num=8, root_path="./pca_pkl", use_norm=False):
        self.layer_num = layer_num
        self.use_norm = use_norm # -1~1

        self.pca1_list = []
        for z_i in range(self.layer_num):
            with open(f"{root_path}/Altered_zattr{z_i}PCA.pkl", "rb") as file:
                self.pca1_list.append(pickle.load(file))
        with open(f"{root_path}/Altered_zattr_doublePCA.pkl", "rb") as file:
            self.pca2 = pickle.load(file)
        with open(f"{root_path}/Altered_zattr_doublePCAMinMax.pkl", "rb") as file:
            minmax = pickle.load(file)
        self.pca2_min = minmax["min"]
        self.pca2_max = minmax["max"]
    def transform(self, z_embeds) -> np.ndarray:
        p1emb_array = [self.pca1_list[z_i].transform(z_embeds[z_i]) for z_i in range(self.layer_num)]
        p1emb_array = np.concatenate(p1emb_array, axis=1)
        return self.pca2.transform(p1emb_array)

    def inverse_transform(self, p2emb_array)->List[np.ndarray]:
        p1emb_array = self.pca2.inverse_transform(p2emb_array).reshape([-1, self.layer_num, 128])
        z_embeds = [self.pca1_list[z_i].inverse_transform(p1emb_array[:,z_i]) for z_i in range(self.layer_num)]
        return z_embeds

    def calcul_z_embed_diff(self, target:np.ndarray, original:np.ndarray)->List[np.ndarray]:
        assert target.shape[0] == original.shape[0]
        target = self.inverse_transform(target)
        original = self.inverse_transform(original)
        return [target[i]-original[i] for i in range(self.layer_num)]

    def inject_pca_param(self, orig_z_embeds: List[np.ndarray], pca_param:dict, add=False)->List[np.ndarray]:
        p2emb = self.transform(orig_z_embeds)
        orig_p2embed = p2emb.copy()
        if self.use_norm:
            for k in pca_param:
                if add:
                    p2emb[:, k] += pca_param[k] * (self.pca_max[k]-self.pca_min[k])/2
                else:
                    p2emb[:, k] = self.pca_min[k] + (pca_param[k]+1)/2*(self.pca_max[k]-self.pca_min[k])
        else:
            for k in pca_param:
                p2emb[:, k] = pca_param[k]+p2emb[:, k]*int(add)
        diff = self.calcul_z_embed_diff(p2emb, orig_p2embed)
        return [orig_z_embeds[i]+diff[i] for i in range(len(orig_z_embeds))]





class FaceSwap_PCAInjection():

    def __init__(self, pca_mode="double",pca_use_norm=False):
        self.crop_size = 224 

        self.app = Face_detect_crop(name='antelope', root='./insightface_func/models')
        self.app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))

        # main model for generation
        self.G = AEI_Net(backbone='unet', num_blocks=2, c_id=512)
        self.G.eval()
        self.G.load_state_dict(torch.load('weights/G_unet_2blocks.pth', map_location=torch.device('cpu')))
        self.G = self.G.cuda()
        self.G = self.G.half()

        # arcface model to get face embedding
        self.netArc = iresnet100(fp16=False)
        self.netArc.load_state_dict(torch.load('arcface_model/backbone.pth'))
        self.netArc = self.netArc.cuda()
        self.netArc.eval()

        # model to get face landmarks
        self.handler = Handler('./coordinate_reg/model/2d106det', 0, ctx_id=0, det_size=640)

        # model to make superres of face, set use_sr=True if you want to use super resolution or use_sr=False if you don't
        opt = TestOptions()
        #opt.which_epoch ='10_7'
        self.model = Pix2PixModel(opt)
        self.model.netG.train()
        if pca_mode == "double":
            self.pca = DoublePCA(use_norm=pca_use_norm)
        else:
            try:
                pca_mode = int(pca_mode)
                assert pca_mode >= 0 and pca_mode <= 7
                self.pca = SinglePCA(pca_mode, use_norm=pca_use_norm)
            except:
                raise ValueError("pca_mode should be 'double' or integer in [0~7]")

        
    
    def swap_face(self, source: Union[np.ndarray, str],
                    target: Union[np.ndarray, str],
                    is_tgt_video=False,
                    BS = 60,
                    pca_param: dict = None,
                    pca_add: bool = False):
        """ # TODO

        source와 target은 cv2.imread의 출력, 즉 bgr ndarray 입력으로 간주한다.
        단, target이 영상인 경우에는 [t, H, W, C]꼴의 ndarray 리스트 입력으로 생각한다

        """
        
        if isinstance(source, str):
            source = cv2.imread(source)
        source = [crop_face(source, self.app, self.crop_size)[0][:,:,::-1]]

        if isinstance(target, str):
            if is_tgt_video:
                full_frames, fps = read_video(tgt)
            else:
                target_full = cv2.imread(target)
                full_frames = [target_full]
        else:
            full_frames = target if is_tgt_video else [target]

        cropped_target = get_target(full_frames, self.app, self.crop_size)
        target_norm = normalize_and_torch_batch(np.array(cropped_target))
        target_embeds = self.netArc(F.interpolate(target_norm, scale_factor=0.5, mode='bilinear', align_corners=True))
        crop_frames_list, tfm_array_list = crop_frames_and_get_transforms(full_frames,
                                                                    target_embeds,
                                                                    self.app,
                                                                    self.netArc,
                                                                    self.crop_size,
                                                                    set_target=False,
                                                                    similarity_th=0.15
                                                                    )
        crop_frames_list = crop_frames_list
        tfm_array_list = tfm_array_list

        source_embeds = []
        for source_curr in source:
            source_curr = normalize_and_torch(source_curr)
            source_embeds.append(self.netArc(F.interpolate(source_curr, scale_factor=0.5, mode='bilinear', align_corners=True)))

        final_frames_list = []
        for idx, (crop_frames, tfm_array, source_embed) in enumerate(zip(crop_frames_list, tfm_array_list, source_embeds)):
            # Resize croped frames and get vector which shows on which frames there were faces
            resized_frs, present = resize_frames(crop_frames)
            resized_frs = np.array(resized_frs)

            # transform embeds of Xs and target frames to use by model
            target_batch_rs = transform_target_to_torch(resized_frs, half=True)
            #assert False
            source_embed = source_embed.half()

            # run model
            size = target_batch_rs.shape[0]
            model_output = []
            for i in range(0, size, BS):
                zattrs = self.G.get_attr(target_batch_rs[i:i+BS])
                if pca_param:
                    orig_z_shape = [zattrs[j].shape for j in range(len(zattrs))]
                    np_zattrs = [zattrs[j].detach().cpu().numpy().reshape([orig_z_shape[j][0], -1]) for j in range(len(zattrs))]
                    injected_z = self.pca.inject_pca_param(np_zattrs, pca_param, add=pca_add)
                    zattrs = [torch.from_numpy(injected_z[j].reshape(*orig_z_shape[j])).half().cuda() for j in range(len(orig_z_shape))]
                Y_st = faceshifter_batch_zattrs(source_embed, zattrs, BS, self.G)
                model_output.append(Y_st)
            torch.cuda.empty_cache()
            model_output = np.concatenate(model_output)

            # create list of final frames with transformed faces
            final_frames = []
            idx_fs = 0
            for pres in present:
                if pres == 1:
                    final_frames.append(model_output[idx_fs])
                    idx_fs += 1
                else:
                    final_frames.append([])
            final_frames_list.append(final_frames)

        final_frames_list = face_enhancement(final_frames_list, self.model)
        
        if is_tgt_video:
            assert False, "not implemented"
        else:
            result = get_final_image(final_frames_list, crop_frames_list, full_frames[0], tfm_array_list, self.handler)
        return result



In [11]:
import ipywidgets as widgets

In [4]:
faceswap = FaceSwap_PCAInjection(pca_mode=5,pca_use_norm=True)

source_upload = widgets.FileUpload(
    description="Source Upload",
    aceept="",
    multiple=False
)
target_upload = widgets.FileUpload(
    description="Target Upload",
    aceept="",
    multiple=False
)
smile_button = widgets.ToggleButton(
    value=False,
    description="Make you Smile ;)",
    disabled=False,
    icon=""
)

line1_multibox = widgets.HBox(
    children=[source_upload,
    target_upload,
    smile_button
    ]
)

def plotting(source_value, target_value, smile: bool):
    if bool(source_value) and bool(target_value):
        source = cv2.imdecode(np.asarray(source_value[0]["content"], dtype=np.uint8), cv2.IMREAD_COLOR)
        target = cv2.imdecode(np.asarray(target_value[0]["content"], dtype=np.uint8), cv2.IMREAD_COLOR)
        plt.figure(num=1, clear=True, figsize=[12+int(smile)*4, 4])
        plt.subplot(1, 3+int(smile), 1)
        plt.imshow(source[:, :, ::-1])
        plt.axis("off")
        plt.subplot(1, 3+int(smile), 2)
        plt.imshow(target[:, :, ::-1])
        plt.axis("off")
        result = faceswap.swap_face(
            source=source,
            target=target)
        plt.subplot(1, 3+int(smile), 3)
        plt.imshow(result[:, :, ::-1])
        plt.axis("off")
        if smile:
            plt.subplot(1, 3+int(smile), 4)
            result = faceswap.swap_face(
            source=source,
            target=target,
            pca_param={8:1.5})
            plt.imshow(result[:, :, ::-1])
            plt.axis("off")
        plt.tight_layout()
        plt.show()
        


iplot = widgets.interactive_output(plotting,
                                   {'source_value':source_upload,
                                    'target_value':target_upload,
                                    'smile':smile_button
                                    })

display(line1_multibox, iplot)

input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)
loading ./coordinate_reg/model/2d106det 0
input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


[05:36:22] ../src/nnvm/legacy_json_util.cc:208: Loading symbol saved by previous version v1.5.0. Attempting to upgrade...
[05:36:22] ../src/nnvm/legacy_json_util.cc:216: Symbol successfully upgraded!


Network [LIPSPADEGenerator] was created. Total number of parameters: 72.2 million. To see the architecture, do print(network).
Load checkpoint from path:  weights/10_net_G.pth


object.__init__() takes exactly one argument (the instance to initialize)
This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.
  super().__init__(**kwargs)
object.__init__() takes exactly one argument (the instance to initialize)
This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.
  super().__init__(**kwargs)
  from ipykernel.pylab.backend_inline import flush_figures


HBox(children=(FileUpload(value=(), description='Source Upload'), FileUpload(value=(), description='Target Upl…

Output()

# 영상 받을려고 만지작거리는 코드
# 미완성

In [12]:
source_upload = widgets.FileUpload(
    description="Source Upload",
    aceept="",
    multiple=False
)
target_upload = widgets.FileUpload(
    description="Target Upload",
    aceept="",
    multiple=False
)

line1_multibox = widgets.HBox(
    children=[source_upload,
    target_upload,
    ]
)
display(line1_multibox)

HBox(children=(FileUpload(value=(), description='Source Upload'), FileUpload(value=(), description='Target Upl…

In [26]:
print(source_upload.value)
source_upload.value[0].type


video_stream = cv2.imdecode(np.frombuffer(source_upload.value[0]["content"], dtype=np.uint8), cv2.IMREAD_UNCHANGED)

# 메모리 버퍼로부터 동영상을 읽어들이기 위해 VideoCapture 객체 생성
video_capture = cv2.VideoCapture(video_stream)

while video_capture.isOpened():
    ret, frame = video_capture.read()
    if not ret:
        break

    # 프레임 처리 (여기서 필요한 작업을 수행)
    cv2.imshow('Frame', frame)

    # 'q' 키를 누르면 루프 종료
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 자원 해제
video_capture.release()

({'name': 'what_is_love_kekw.mp4', 'type': 'video/mp4', 'size': 231549, 'content': <memory at 0x7fad02c98940>, 'last_modified': datetime.datetime(2024, 4, 12, 8, 16, 7, 476000, tzinfo=datetime.timezone.utc)},)


[ERROR:0@493.528] global cap.cpp:164 open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.9.0) /io/opencv/modules/videoio/src/cap_images.cpp:300: error: (-215:Assertion failed) !_filename.empty() in function 'open'




In [13]:
with open("temp_video.mp4", "wb") as f:
    f.write(source_upload.value[0]["content"].tobytes())

In [38]:
source_upload.value[0]

{'name': 'jaeseung_2_heheboi.mp4',
 'type': 'video/mp4',
 'size': 511907,
 'content': <memory at 0x7face36bb880>,
 'last_modified': datetime.datetime(2024, 4, 11, 12, 24, 44, 792000, tzinfo=datetime.timezone.utc)}

In [None]:
%conda install panel watchfiles jupyter_bokeh -y

In [1]:
import panel as pn
pn.extension()
file_input = pn.widgets.FileInput()
display(file_input)

BokehModel(combine_events=True, render_bundle={'docs_json': {'d082c10f-0e6a-4136-a55b-3c89d78af469': {'version…

In [9]:
type(file_input.value)
if file_input.value is not None:
    file_input.save('test.mp4')

In [35]:
np.frombuffer(source_upload.value[0]["content"], dtype=np.uint8).shape

(511907,)

In [None]:
video_capture = cv2.VideoCapture()
video_capture.get

In [25]:
print(video)

None


# 구 사용 예제 코드

In [None]:
faceswap = FaceSwap_PCAInjection(pca_mode=5,pca_use_norm=True)

input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)
loading ./coordinate_reg/model/2d106det 0
input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


[10:06:48] ../src/nnvm/legacy_json_util.cc:208: Loading symbol saved by previous version v1.5.0. Attempting to upgrade...
[10:06:48] ../src/nnvm/legacy_json_util.cc:216: Symbol successfully upgraded!


Network [LIPSPADEGenerator] was created. Total number of parameters: 72.2 million. To see the architecture, do print(network).
Load checkpoint from path:  weights/10_net_G.pth


In [None]:
print("no injection")
for target in [
    "examples/images/jaeseung_3.jpg",
    "examples/images/great-faker.webp",
    "examples/images/tgt2.png",
    ]:

    result = faceswap.swap_face(
        source="examples/images/elon_musk.jpg",
        target=target)

    plt.imshow(result[:, :, ::-1])
    plt.show()
print("="*30)
print("="*30)
print("After inject")
for target in [
    "examples/images/jaeseung_3.jpg",
    "examples/images/great-faker.webp",
    "examples/images/tgt2.png",
    ]:

    result = faceswap.swap_face(
        source="examples/images/elon_musk.jpg",
        target=target,
        pca_param={
            8: 1.5},
        pca_add=True)

    plt.imshow(result[:, :, ::-1])
    plt.show()

print("="*30)
print("="*30)
print("After inject")
for target in [
    "examples/images/jaeseung_3.jpg",
    "examples/images/great-faker.webp",
    "examples/images/tgt2.png",
    ]:

    result = faceswap.swap_face(
        source="examples/images/elon_musk.jpg",
        target=target,
        pca_param={
            8: 1.5},
        pca_add=False)

    plt.imshow(result[:, :, ::-1])
    plt.show()

no injection


NameError: name 'faceswap' is not defined

In [16]:
source_upload.value["conetent"]

TypeError: tuple indices must be integers or slices, not str