In [None]:
!apt-get install -y ffmpeg

import locale
locale.getpreferredencoding = lambda: 'UTF-8'
!pip install gradio

In [None]:
import gradio as gr
from PIL import Image
from ultralytics import YOLO
import os
import subprocess
import ffmpeg
from TTS.api import TTS

# Load the pre-trained YOLO gender detection model
gender_model_path = './GenderModel_YOLOv10.pt' #YOUR TRAINED MODEL
gender_model = YOLO(gender_model_path)

# Define paths for male and female videos
male_videos = "/content/zaka/test-male"
female_videos = "/content/zaka/test-female"
output_video_path = "/content/zaka/SimSwap/output/output_video.mp4"  # Path where SimSwap video will be saved

# Actor face mapping
video_to_actor_map = {
    "/content/zaka/test-female/MaguyBouGhoson.mp4": "/content/zaka/test-female/Maguyy.png",
    "/content/zaka/test-female/Iman.mp4": "/content/zaka/test-female/ImanSayed.png",
    "/content/zaka/test-female/SamiAlJazaeri.mp4": "/content/zaka/test-female/samia.png",
    "/content/zaka/test-female/Shokran.mp4": "/content/zaka/test-female/Torfa.png",
    "/content/zaka/test-male/KousayKhawli.mp4": "/content/zaka/test-male/Kousay.png",
    "/content/zaka/test-male/Joude.mp4": "/content/zaka/test-male/Day3aDay3a.png",
    "/content/zaka/test-male/Moetassem.mp4": "/content/zaka/test-male/Moetassem.png",
    "/content/zaka/test-male/Taym AL Hassan.mp4": "/content/zaka/test-male/TaymAlHassan.png"}

# Function to get the list of videos based on gender
def get_videos_by_gender(gender):
    if gender == 'male':
        return [f for f in os.listdir(male_videos) if f.endswith(".mp4")]
    elif gender == 'female':
        return [f for f in os.listdir(female_videos) if f.endswith(".mp4")]
    return []

# Gender Detection and Video Listing Function
def detect_gender_and_show_videos(image):
    image = Image.open(image)  # Open the uploaded image
    results = gender_model.predict(image, verbose=False)  # Predict using YOLO model

    confidence_threshold = 0.5  # Confidence threshold for predictions
    detected_gender = None

    # Process detection results
    for result in results:
        for box in result.boxes:
            class_id = int(box.cls)
            confidence = box.conf.item()
            if confidence >= confidence_threshold:
                detected_gender = 'female' if class_id == 1 else 'male'

    if detected_gender is None:
        return "Gender not detected. Try again with a clearer image.", gr.update(choices=[])

    # Get list of videos based on detected gender
    videos = get_videos_by_gender(detected_gender)

    if videos:
        return f"Detected gender: {detected_gender}", gr.update(choices=videos, value=videos[0])
    else:
        return f"Detected gender: {detected_gender}. No videos available.", gr.update(choices=[])

# Function to run SimSwap based on selected video
def run_simswap(image, video):
    user_face_path = image
    selected_video = os.path.join(male_videos, video) if video in os.listdir(male_videos) else os.path.join(female_videos, video)
    actor_face = video_to_actor_map[selected_video]

    output_video_path = "/content/zaka/SimSwap/output/output_video.mp4"

    # Command to run SimSwap
    command = f"""
    python test_video_swapspecific.py --no_simswaplogo --crop_size 224 --use_mask \\
    --pic_specific_path "{actor_face}" \\
    --name people \\
    --Arc_path arcface_model/arcface_checkpoint.tar \\
    --pic_a_path "{user_face_path}" \\
    --video_path "{selected_video}" \\
    --checkpoints_dir "./checkpoints" \\
    --output_path "{output_video_path}" \\
    --temp_path ./temp_results
    """

    # Navigate into SimSwap directory and run command
    os.chdir('SimSwap')
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()

    if process.returncode == 0:
        return output_video_path  # Return the video file path for Gradio to use later
    else:
        return f"Error occurred during SimSwap execution: {stderr.decode()}"

# Function to convert the uploaded audio file to .wav
def convert_audio_to_wav(audio):
    output_audio_path = "converted_audio.wav"  # Path for the output .wav file

    try:
        # Convert the input audio to .wav using ffmpeg
        ffmpeg.input(audio).output(output_audio_path).run(overwrite_output=True)
        return output_audio_path  # Return the path of the converted file
    except Exception as e:
        return f"Error in conversion: {str(e)}"

# Function to extract audio from video and save it as .wav
def extract_audio_from_video(video):
    output_audio_path = "extracted_audio.wav"  # Path for the extracted audio file

    try:
        # Extract audio from video and save as .wav using ffmpeg
        ffmpeg.input(video).output(output_audio_path).run(overwrite_output=True)
        return output_audio_path  # Return the path of the extracted audio
    except Exception as e:
        return f"Error in extraction: {str(e)}"

# Function to clone the user's voice onto the extracted audio
def clone_voice_and_replace_audio(audio, video):
    # Step 1: Convert audio to .wav
    converted_audio = convert_audio_to_wav(audio)

    # Step 2: Extract audio from video
    extracted_audio = extract_audio_from_video(video)

    output_cloned_audio_path = "cloned_audio.wav"  # Path for the cloned audio
    output_video_with_cloned_audio = "video_with_cloned_audio.mp4"  # Path for the output video

    # Initialize the TTS model for voice cloning
    tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)

    try:
        # Perform voice conversion using the extracted audio and user's converted voice
        tts.voice_conversion_to_file(extracted_audio, converted_audio, output_cloned_audio_path)

        # Replace the audio in the video with the cloned audio
        os.system(f"""
            ffmpeg -i {video} -i {output_cloned_audio_path} \\
            -c:v copy -c:a aac -strict experimental \\
            -map 0:v:0 -map 1:a:0 {output_video_with_cloned_audio}
        """)
        return output_video_with_cloned_audio  # Return the path of the output video
    except Exception as e:
        return f"Error in cloning voice or replacing audio: {str(e)}"

# Gradio Interface
def gradio_interface():
    with gr.Blocks() as iface:
        # Upload the image for gender detection
        with gr.Row():
            image_input = gr.Image(type="filepath", label="Upload your image")
            video_output = gr.Dropdown(label="Select a Video", choices=[], interactive=True)

        # Button to run SimSwap after video selection
        swap_button = gr.Button("Run SimSwap")

        # Text output for detected gender
        gender_text = gr.Textbox(label="Gender")

        # Video output for displaying the result
        result_video = gr.Video()

        # Detect gender and show videos when image is uploaded
        image_input.change(fn=detect_gender_and_show_videos, inputs=image_input, outputs=[gender_text, video_output])

        # Run SimSwap when the button is pressed
        swap_button.click(fn=run_simswap, inputs=[image_input, video_output], outputs=result_video)

        # Section to upload audio for voice cloning
        with gr.Row():
            audio_input = gr.Audio(type="filepath", label="Upload your audio file")
            clone_button = gr.Button("Clone Voice and Replace Audio")
            final_video_output = gr.Video(label="Final Video with Cloned Voice")

        # Clone the voice and replace audio in the SimSwap video
        clone_button.click(fn=clone_voice_and_replace_audio, inputs=[audio_input, result_video], outputs=final_video_output)
    iface.launch()

# Launch the Gradio Interface
gradio_interface()
