# Projeto Final

## Instalação de Pacotes

In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.19.2-py3-none-any.whl (16.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.9/16.9 MB[0m [31m83.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.10.1 (from gradio)
  Downloading gradio_client-0.10.1-py3-none-any.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.9/307.9 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━

In [2]:
!pip install -q -U google-generativeai

In [3]:
!pip install --upgrade pip
!pip install --upgrade transformers scipy

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.0
Collecting transformers
  Downloading transformers-4.38.1-py3-none-any.whl.metadata (131 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting scipy
  Downloading scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.38.1-py3-none-any.whl (8.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m47.8 M

In [None]:
#!python3 -m pip install -U git+https://github.com/facebookresearch/audiocraft

Collecting git+https://github.com/facebookresearch/audiocraft
  Cloning https://github.com/facebookresearch/audiocraft to /tmp/pip-req-build-9yr0iyaa
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/audiocraft /tmp/pip-req-build-9yr0iyaa
  Resolved https://github.com/facebookresearch/audiocraft to commit 69fea8b290ad1b4b40d28f92d1dfc0ab01dbab85
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting av (from audiocraft==1.3.0a1)
  Downloading av-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops (from audiocraft==1.3.0a1)
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flashy>=0.0.1 (from audiocraft==1.3.0a1)
  Downloading flashy-0

## Importação de Pacotes

In [4]:
import torch
import numpy
import pathlib
import textwrap
import PIL.Image
import gradio as gr
from google.colab import userdata
import google.generativeai as genai
from IPython.display import display
from IPython.display import Markdown

import scipy
from transformers import pipeline

## Implementação do App

In [5]:
GEMINI_API_KEY = userdata.get("GEMINI_API_KEY")

genai.configure(api_key=GEMINI_API_KEY)

In [6]:
class MusicGenWrapper:
    """Wrapper class for interacting with the MusicGen pre-trained model."""

    def __init__(self):
        """
        Initializes the wrapper and sets up a text-to-song model based on a pre-trained model from MusicGen.
        """

        self.synthesizer = pipeline("text-to-audio", "facebook/musicgen-small")

        return

    def text_to_song(self, track_description):
        """
        Gets a short description of a music track, generates it and stores it as '.wav' in the current runtime environment.

        Args:
        track_description: The simple description of the audio track to be generated as a string.

        Returns:
        The name of the file path to the generated audio file in the '.wav' format.
        """

        # Testar executar o método abaixo com o seguinte parâmetro 'max_new_tokens' com diferentes inteiros (controla tamanho da música gerada).
        # Do jeito que tá, os áudios gerados tem 30 segundos e demoram 6 min pra ser gerados (Usando T4).
        track_file_path = "track_1.wav"
        track = self.synthesizer(track_description, forward_params={"do_sample": True})
        scipy.io.wavfile.write(track_file_path, rate=track["sampling_rate"], data=track["audio"])

        return track_file_path

In [7]:
class GeminiWrapper:
    """Wrapper class for interacting with the Google Gemini API."""

    def __init__(self):
        """
        Initializes the wrapper with your Google Gemini API key.
        """

        self.image_to_text_model = genai.GenerativeModel("gemini-pro-vision")
        self.text_to_text_model = genai.GenerativeModel("gemini-pro")
        return

    def image_to_text(self, image):
        """
        Gets a detailed description of a given image using the Gemini API.

        Args:
        image: An image in the format () whose description will be obtained by the use of the Gemini API as a string.

        Returns:
        The detailed description of the uploaded image as a string.

        Raises:
        Exception: If the API request fails.
        """

        image = PIL.Image.fromarray(image)
        description_prompt = """
          Please, provide a very detailed description with at least 200 words, in English,
          about the image that is being sent."""
        response = self.image_to_text_model.generate_content([description_prompt, image])
        return response.text

    def text_to_sentiment(self, image_description):
        """
        Gets the general vibe or sentiment of a given image description as text by using the Gemini API.

        Args:
        image_description: An image_description as a string.

        Returns:
        The brief description of the feelings/sentiments/vibes that one person could get by reading the provided image description as a string.

        Raises:
        Exception: If the API request fails.
        """

        vibe_sentiment_prompt = """
        In this request, I am providing an image description to you after the character ':'.
        As a response, I want you to provide to me a possible brief description of the vibes, sentiments and feelings (in English!)
        that one person could get by reading such image description. However, I don't want you to put descriptive elements of the image in your response.
        I want you to focus only on the feelings, sentiments and vibes, as I've already mentioned. Also make sure to not use a bulletpoints list or something like that.
        Instead, describe such a thing with a single sentence"""
        complete_prompt = f"{vibe_sentiment_prompt}: {image_description}"
        response = self.text_to_text_model.generate_content(complete_prompt)
        return response.text

    def sentiment_to_image_vertex(self, artist_band_name, single_vibe_sentiment):
        sentiment_to_image_prompt = f""

        return

    def sentiment_to_image_stable_diffusion(self, artist_band_name, single_vibe_sentiment):
        sentiment_to_image_prompt = f""

        return

In [13]:
"""
If your function accepts more than one argument, as is the case above, pass a list of input components to inputs,
with each input component corresponding to one of the arguments of the function, in order.
The same holds true if your function returns more than one value: simply pass in a list of components to outputs.
"""

gemini = GeminiWrapper()
music_gen = MusicGenWrapper()

def get_num_tracks(artist_band_name, single_vibe_image, single_description):
    image_description = gemini.image_to_text(single_vibe_image)
    image_vibe_sentiment = gemini.text_to_sentiment(image_description)
    single_vibe_sentiment = image_vibe_sentiment

    album_cover_1 = gemini.sentiment_to_image_vertex(artist_band_name, single_vibe_sentiment)
    album_cover_2 = gemini.sentiment_to_image_stable_diffusion(artist_band_name, single_vibe_sentiment)

    # track_file_path = music_gen.text_to_song(single_description)

    return [artist_band_name, None, None, None, None, None, "track_1.wav"]

# The Interface class is designed to create demos for machine learning models which accept one or more inputs, and return one or more outputs.
apollo_demo = gr.Interface(
    description="""
    # Apollo

    ### O que é?
    * Apollo é uma ferramenta simples para geração de Singles (ou melhor dizendo, 'sneak peeks' de 30 segundos de um Single) que segue as instruções fornecidas pelo usuário.
    * Basta fornecer algumas entradas específicas, explicadas abaixo, e, ao final do processo, você obterá o Single como resultado e poderá baixá-lo.

    ### Entradas
    * __Nome do artista/banda responsável pelo Single:__ Aqui, o usuário pode escolher livremente o nome do artista/banda que irá 'compor' o Single em questão.
    * __Imagem que representa a vibe do Single:__ Uma imagem que será usada para extrair o "sentimento geral" que o Single deseja transmitir para o público.
    * __Descrição musical do Single:__ Uma descrição textual individual a respeito do Single a ser gerado. Aqui, o usuário tem total liberdade para descrever como quer que uma faixa específica do álbum seja. O usuário pode descrever o uso de instrumentos musicais, a maneira como os intrumentos devem ser tocados, a intensidade e ritmo de cada um. Além disso, pode citar influências de gêneros musicais, artistas, bandas. O limite é a sua imaginação! E, lembre-se, quanto mais detalhado, melhor!

    ### Saídas
    * __Nome do Artista:__ O nome do artista/banda que produziu o Single gerado, escolhido cuidadosamente pelo deus Apollo pessoalmente.
    * __Nome do Single:__ O nome do Single gerado, escolhido cuidadosamente pelo deus Apollo pessoalmente.
    * __Capa do Single:__ A capa do Single gerado, ilustrada exclusivamente pelo próprio Apollo.
    * __Single:__ O arquivo do Single gerado no formato '.wav' disponível para download.

    """,
    fn=get_num_tracks, # You can pass any function that you want to wrap with a UI. Here, we saw a simple function, but it could be anything from a music generator to the prediction function of a pretrained machine learning model.
    inputs=[
        gr.Textbox(lines=1, max_lines=1, label="Nome do artista/banda responsável pelo Single"),
        gr.Image(label="Imagem que representa a 'vibe' do Single"),
        gr.Textbox(lines=3, max_lines=10, label="Descrição musical do Single"),
    ],
    outputs=[
        gr.Textbox(lines=1, max_lines=1, label="Nome do artista/banda"),
        gr.Textbox(lines=1, max_lines=1, label="Nome do Single gerado"),
        gr.Textbox(lines=3, max_lines=10, label="Descrição da imagem de entrada"),
        gr.Textbox(lines=3, max_lines=10, label="Sentimentos/Emoções/Vibes inferidas da descrição da imagem de entrada"),
        gr.Image(label="Capa do Single (Vertex AI)"),
        gr.Image(label="Capa do Single (Stable Diffusion)"),
        gr.Audio(value="track_1.wav", label="Single")
    ],
    allow_flagging="never",
    clear_btn=gr.Button(visible=False),
    submit_btn=gr.Button(value="Gerar"),
)


apollo_demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://5b727162e2c9ef05cc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5b727162e2c9ef05cc.gradio.live


