In [None]:
NEWS_API = 'p9Heor4nHnJxXv4pCwnpKAin0L3nGOwo7u8RNkwg'

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=512,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en"
)

In [None]:
from langchain_core.prompts import PromptTemplate

# Define the ChatPromptTemplate for user interaction
template = """Answer the following question from the context
    context = {context}
    question = {question}
"""
prompt = PromptTemplate(input_variables=["context","question"], template= template)

In [None]:
import requests
from datetime import datetime, timedelta

def get_top_news():
    # Get the current time and subtract one day
    one_day_ago = datetime.now() - timedelta(days=1)
    # Format the date and time
    published_after_time = one_day_ago.strftime("%Y-%m-%dT%H:%M:%S")

    # Define the endpoint URL
    url = "https://api.thenewsapi.com/v1/news/top"

    # Set the query parameters
    params = {
        'api_token': NEWS_API,
        'locale': 'in',
        'limit': 3,
        'published_after': published_after_time
    }

    # Send the GET request
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Return the JSON response
        return response.json()
    else:
        # Return the error message
        return response.text



In [None]:
import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS

def scrape_news_data():

    news_data = get_top_news()
    scraped_data = []

    for article in news_data['data']:
        url = article['url']
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the title of the article
        title = soup.find('title').text

        # Extract the text of the article
        article_text = ''
        for paragraph in soup.find_all('p'):
            article_text += paragraph.text + '\n'

        # Create a document to store the scraped data
        scraped_article = Document(page_content= article_text, metadata= {'title': title})

        scraped_data.append(scraped_article)


    vectorstore = FAISS.from_documents(text_splitter.split_documents(scraped_data),embeddings)

    global retriever
    retriever = vectorstore.as_retriever(search_kwargs= {'k':10})

    return 'News Collected Successfully'



In [None]:
def get_context(message, retriever):

    context = "\n".join("title: "+ doc.metadata["title"]+ "news: "+ doc.page_content for doc in retriever.get_relevant_documents(message))
    return context


In [None]:
from langchain_community.llms import Ollama

def respond_to_query(query):
    context = get_context(query, retriever)
    llm = Ollama(model="llama3")

    return llm.invoke(prompt.format(question=query, context= context))

In [None]:
import subprocess
import TTS

def run_tts_command(text):
    # Define the command as a list of arguments
    command = [
        'tts',  # Command executable
        '--text', text,  # Text for TTS
        '--model_name', 'tts_models/multilingual/multi-dataset/xtts_v2',  # Model name
        '--vocoder_name', 'vocoder_models/universal/libri-tts/wavegrad',  # Vocoder name
        '--out_path', '/home/vardh/ai-news-avatar/Wav2Lip/output.wav',  # Output path
        '--speaker_idx', 'Brenda Stern',  # Speaker index
        '--language_idx', 'en'  # Language index
    ]

    # Run the command
    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print("TTS generation successful, output saved to 'output.wav'")
        return '/home/vardh/ai-news-avatar/Wav2Lip/output.wav'  # Return the standard output if needed
    except subprocess.CalledProcessError as e:
        print(f"An error occurred: {e.stderr.decode()}")
        return None



In [None]:
import os
import subprocess

def run_wav2lip_command():
    # Navigate to the Wav2Lip directory
    wav2lip_dir = '/home/vardh/ai-news-avatar/Wav2Lip'
    os.chdir(wav2lip_dir)

    # Construct the command
    command = [
        'python', 'inference.py',
        '--checkpoint_path', 'checkpoints/wav2lip.pth',
        '--face', 'face.mp4',
        '--audio', 'output.wav'
    ]

    # Run the command
    result = subprocess.run(command)
    return '/home/vardh/ai-news-avatar/Wav2Lip/results/result_voice.mp4'



In [None]:
# Download MobileNet from here: https://drive.google.com/drive/folders/1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1

In [36]:
import gradio as gr

with gr.Blocks() as demo:
    with gr.Row():
        btn = gr.Button("Fetch Latest News")
        response = gr.Text()
    with gr.Row():
        query = gr.Textbox(label= "Ask me about the news")
        news_text = gr.Textbox(label= "Response")
    with gr.Row():
        news_audio = gr.Audio(label= 'Audio Response', type= 'filepath')
        news_video = gr.Video(label= 'Lip Synced Video')


    btn.click(fn= scrape_news_data, inputs= None, outputs= response)
    news_query = query.submit(fn= respond_to_query, inputs= query, outputs= news_text)
    audio_query = news_query.then(fn= run_tts_command, inputs= news_text , outputs= news_audio)
    audio_query.then(fn= run_wav2lip_command, inputs= None, outputs= news_video)

demo.launch(server_name='0.0.0.0')

Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.




TTS generation successful, output saved to 'output.wav'


Traceback (most recent call last):
  File "/home/vardh/.local/lib/python3.10/site-packages/gradio/queueing.py", line 528, in process_events
    response = await route_utils.call_process_api(
  File "/home/vardh/.local/lib/python3.10/site-packages/gradio/route_utils.py", line 270, in call_process_api
    output = await app.get_blocks().process_api(
  File "/home/vardh/.local/lib/python3.10/site-packages/gradio/blocks.py", line 1908, in process_api
    result = await self.call_function(
  File "/home/vardh/.local/lib/python3.10/site-packages/gradio/blocks.py", line 1485, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/opt/conda/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/conda/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2134, in run_sync_in_worker_thread
    return await future
  File "/opt/conda/lib/python3.10/site-packages/anyio/_backend