In [None]:
NEWS_API = 'p9Heor4nHnJxXv4pCwnpKAin0L3nGOwo7u8RNkwg'

In [22]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=512,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [23]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en"
)

In [46]:
from TTS.api import TTS
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

OSError: /home/vardh/.local/lib/python3.10/site-packages/torchaudio/lib/libtorchaudio.so: undefined symbol: _ZN3c104cuda14ExchangeDeviceEa

In [24]:
from langchain_core.prompts import PromptTemplate

# Define the ChatPromptTemplate for user interaction
template = """Answer the following question from the context
    context = {context}
    question = {question}
"""
prompt = PromptTemplate(input_variables=["context","question"], template= template)

In [None]:
import requests
from datetime import datetime, timedelta

def get_top_news():
    # Get the current time and subtract one day
    one_day_ago = datetime.now() - timedelta(days=1)
    # Format the date and time
    published_after_time = one_day_ago.strftime("%Y-%m-%dT%H:%M:%S")

    # Define the endpoint URL
    url = "https://api.thenewsapi.com/v1/news/top"

    # Set the query parameters
    params = {
        'api_token': NEWS_API,
        'locale': 'in',
        'limit': 3,
        'published_after': published_after_time
    }

    # Send the GET request
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Return the JSON response
        return response.json()
    else:
        # Return the error message
        return response.text



In [None]:
# Example usage
news_data

In [38]:
import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS

def scrape_news_data(news_data):

    scraped_data = []

    for article in news_data['data']:
        url = article['url']
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the title of the article
        title = soup.find('title').text

        # Extract the text of the article
        article_text = ''
        for paragraph in soup.find_all('p'):
            article_text += paragraph.text + '\n'

        # Create a document to store the scraped data
        scraped_article = Document(page_content= article_text, metadata= {'title': title})

        scraped_data.append(scraped_article)


    vectorstore = FAISS.from_documents(text_splitter.split_documents(scraped_data),embeddings)

    return vectorstore.as_retriever(search_kwargs= {'k':10})



In [43]:
def get_context(message, retriever):

    context = "\n".join("title: "+ doc.metadata["title"]+ "news: "+ doc.page_content for doc in retriever.get_relevant_documents(message))
    return context


In [39]:
from langchain_community.llms import Ollama

def respond_to_query(query, retriever):
    context = get_context(query, retriever)
    llm = Ollama(model="llama3")

    return llm.invoke(prompt.format(question=query, context= context))

In [47]:
import subprocess

def run_tts_command(text):
    # Define the command as a list of arguments
    command = [
        'tts',  # Command executable
        '--text', text,  # Text for TTS
        '--model_name', 'tts_models/multilingual/multi-dataset/xtts_v2',  # Model name
        '--vocoder_name', 'vocoder_models/universal/libri-tts/wavegrad',  # Vocoder name
        '--out_path', 'Wav2Lip/output.wav',  # Output path
        '--speaker_idx', 'Brenda Stern',  # Speaker index
        '--language_idx', 'en'  # Language index
    ]

    # Run the command
    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print("TTS generation successful, output saved to 'output.wav'")
        return result.stdout  # Return the standard output if needed
    except subprocess.CalledProcessError as e:
        print(f"An error occurred: {e.stderr.decode()}")
        return None

# Example usage
text_to_speak = "Hello, this is a test of the text-to-speech command."
run_tts_command(text_to_speak)

TTS generation successful, output saved to 'output.wav'


b" > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.\n > Using model: xtts\n > Text: Hello, this is a test of the text-to-speech command.\n > Text splitted to sentences.\n['Hello, this is a test of the text-to-speech command.']\n > Processing time: 3.651618003845215\n > Real-time factor: 0.9646130077725105\n > Saving output to output.wav\n"

In [51]:
import os
import subprocess

def run_wav2lip_command():
    # Navigate to the Wav2Lip directory
    wav2lip_dir = 'Wav2Lip'
    os.chdir(wav2lip_dir)

    # Construct the command
    command = [
        'python', 'inference.py',
        '--checkpoint_path', 'checkpoints/wav2lip.pth',
        '--face', 'face.mp4',
        '--audio', 'output.wav'
    ]

    # Run the command
    result = subprocess.run(command)
    return result.stdout



In [None]:
# Download MobileNet from here: https://drive.google.com/drive/folders/1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1