# Installation

In [None]:
# clone nemo
!git clone https://github.com/AI4Bharat/NeMo.git -b nemo-v2

# conda install -c nvidia cuda-nvprof=12.2 # Cuda version
!pip install packaging
!pip install huggingface_hub==0.23.2

# install NeMo
%cd NeMo
!bash reinstall.sh

# Necessary imports

In [2]:
import torch
import soundfile as sf
import nemo.collections.asr as nemo_asr
import IPython

      def forward(ctx, input, weight, bias, gradient_accumulation_fusion,
    
      def backward(ctx, grad_output):
    


In [None]:
!pip install langchain_google_genai
!pip install langchain_community

In [7]:
import os
import soundfile as sf
import nemo.collections.asr as nemo_asr
import torch
import torchaudio
from langchain_google_genai import ChatGoogleGenerativeAI
import warnings
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import numpy as np
import datetime
from typing import List, Dict
from IPython.display import Audio, display
from google.colab import output
warnings.filterwarnings('ignore')

class SandalwoodQASystem:
    def __init__(self, csv_path, kannada_asr_path):
        """
        Initialize the QA system for Sandalwood news with text and audio capabilities.

        Args:
            csv_path: Path to the CSV file containing transcripts and translations
            kannada_asr_path: Path to the Kannada ASR model
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Initialize Kannada ASR model
        self.kannada_asr = nemo_asr.models.EncDecCTCModel.restore_from(
            restore_path=kannada_asr_path
        )
        self.kannada_asr.freeze()
        self.kannada_asr = self.kannada_asr.to(self.device)

        # Initialize translator
        os.environ["GOOGLE_API_KEY"] = "AIzaSyD7eJyGM-Twi4Z-XUVdvJ_rGnPcJcFbgR8"
        self.translator = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)

        # Initialize RAG components
        self.initialize_rag_system(csv_path)

        # Audio parameters
        self.sample_rate = 16000
        self.recording_duration = 10  # seconds

        # Load and cache translations
        self.audio_translations = {}
        self.load_translations(csv_path)

    def initialize_rag_system(self, csv_path: str):
        try:
            # Read CSV file
            df = pd.read_csv(csv_path)

            # Prepare documents for embedding
            documents = []
            for _, row in df.iterrows():
                doc_content = {
                    'source': row['audio_file'],
                    'content': {
                        'english': row['full_transcript_translation'],  # Changed from english_translation
                        'kannada': row['full_transcript'],             # Changed from kannada_transcript
                        'time_aligned': {
                            'english': row['time_aligned_translations'],  # Changed from time_aligned_english
                            'kannada': row['time_aligned_transcripts']   # Changed from time_aligned_kannada
                        }
                    }
                }
                documents.append(str(doc_content))

            # Initialize text splitter
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200,
                length_function=len
            )

            # Split documents
            texts = text_splitter.create_documents(documents)

            # Initialize embeddings
            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-mpnet-base-v2"
            )

            # Create vector store
            self.vector_store = FAISS.from_documents(texts, embeddings)

        except Exception as e:
            print(f"Error initializing RAG system: {str(e)}")
            raise

    def load_translations(self, csv_path: str):
        try:
            df = pd.read_csv(csv_path)
            for _, row in df.iterrows():
                audio_file = os.path.basename(row['audio_file'])
                self.audio_translations[audio_file] = {
                    'content': {
                        'english': row['full_transcript_translation'],  # Changed from english_translation
                        'kannada': row['full_transcript'],             # Changed from kannada_transcript
                        'time_aligned': {
                            'english': row['time_aligned_translations'],  # Changed from time_aligned_english
                            'kannada': row['time_aligned_transcripts']   # Changed from time_aligned_kannada
                        }
                    }
                }
        except Exception as e:
            print(f"Error loading translations: {str(e)}")

    def get_audio_translation(self, audio_filename: str) -> Dict:
        """Get translation for an audio file if it exists."""
        return self.audio_translations.get(audio_filename, None)

    def _is_kannada(self, text: str) -> bool:
        """Check if text contains Kannada characters."""
        kannada_range = range(0x0C80, 0x0CFF + 1)
        return any(ord(char) in kannada_range for char in text)

    def translate_to_english(self, kannada_text: str) -> str:
        """Translate Kannada text to English using Google's Gemini model."""
        try:
            response = self.translator.invoke(
                f"Translate the following Kannada text to English: {kannada_text}"
            )
            return response.content
        except Exception as e:
            print(f"Error in translation: {str(e)}")
            return ""

    def transcribe_audio(self, audio_path: str) -> str:
        """Transcribe Kannada audio to text using NeMo ASR model."""
        try:
            # Load and preprocess audio
            audio, sample_rate = sf.read(audio_path, dtype='float64')  # Specify dtype as float64/double

            # Resample if necessary
            if sample_rate != self.sample_rate:
                resampler = torchaudio.transforms.Resample(
                    orig_freq=sample_rate,
                    new_freq=self.sample_rate
                )
                audio = resampler(torch.tensor(audio, dtype=torch.float64)).numpy()

            # Convert to single channel if stereo
            if len(audio.shape) > 1:
                audio = audio.mean(axis=1)

            # Ensure audio is in the correct format for NeMo
            audio = audio.astype('float64')

            # Transcribe
            with torch.no_grad():
                transcription = self.kannada_asr.transcribe(
                    paths2audio_files=[audio_path]
                )[0]

            return transcription

        except Exception as e:
            print(f"Error transcribing audio: {str(e)}")
            return ""

    def record_audio_colab(self):
        """Record audio using Colab's audio recorder widget."""
        try:
            # Create recordings directory if it doesn't exist
            os.makedirs("recordings", exist_ok=True)

            # Generate unique filename
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = f"recordings/recording_{timestamp}.wav"

            print("Recording... Click the microphone icon below to start/stop recording")

            # Display audio recorder widget
            audio_recorder = output.eval_js('''
                new Promise((resolve) => {
                    const recorder = new AudioRecorder();
                    recorder.start()
                        .then(() => {
                            setTimeout(() => {
                                recorder.stop()
                                    .then(audio => resolve(audio));
                            }, 10000);  // 10 seconds recording
                        });
                });
            ''')

            if audio_recorder:
                # Save the recorded audio
                audio_data = np.frombuffer(audio_recorder, dtype=np.float32)
                sf.write(output_path, audio_data, self.sample_rate)
                print("\nRecording saved successfully!")
                return output_path
            else:
                print("\nNo audio recorded")
                return None

        except Exception as e:
            print(f"Error recording audio: {str(e)}")
            return None

    def extract_time_range(self, time_aligned_str):
        """Extract time range from time-aligned transcript string."""
        try:
            # Assuming format like "[0:00-0:30] text [0:31-1:00] text"
            first_time = time_aligned_str.split(']')[0].strip('[')
            last_time = time_aligned_str.split('[')[-1].split(']')[0]
            return f"{first_time}-{last_time}"
        except:
            return "00:00-00:00"

    def extract_audio_segment(self, audio_file: str, time_range: str) -> str:
        """Extract audio segment based on time range and save to file."""
        try:
            # Create extracted_audio directory if it doesn't exist
            os.makedirs("extracted_audio", exist_ok=True)

            # Generate output filename
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = f"extracted_audio/segment_{timestamp}.wav"

            # Parse time range
            start_time, end_time = time_range.split('-')
            start_seconds = sum(float(x) * 60 ** i for i, x in enumerate(reversed(start_time.split(':'))))
            end_seconds = sum(float(x) * 60 ** i for i, x in enumerate(reversed(end_time.split(':'))))

            # Read audio file
            data, samplerate = sf.read(audio_file)

            # Extract segment
            start_frame = int(start_seconds * samplerate)
            end_frame = int(end_seconds * samplerate)
            segment = data[start_frame:end_frame]

            # Save segment
            sf.write(output_path, segment, samplerate)

            return output_path
        except Exception as e:
            print(f"Error extracting audio segment: {e}")
            return ""

    def answer_question(self, question, k=3):
        """Get answer using RAG system."""
        try:
            # Get relevant documents
            docs = self.vector_store.similarity_search(question, k=k)
            results = []

            for doc in docs:
                try:
                    # Safely parse the content back into dictionary
                    content_str = doc.page_content.replace("'", '"')  # Replace single quotes with double quotes
                    content = eval(content_str)

                    # Extract time range from time-aligned transcripts
                    time_range = self.extract_time_range(content['content']['time_aligned']['english'])

                    # Extract audio segment
                    extracted_audio_path = self.extract_audio_segment(content['source'], time_range)

                    # Calculate simple relevance score based on content overlap
                    question_words = set(question.lower().split())
                    content_words = set(content['content']['english'].lower().split())
                    relevance_score = len(question_words.intersection(content_words)) / len(question_words) if question_words else 0

                    result = {
                        'relevance_score': relevance_score,
                        'audio_file': content['source'],
                        'time_range': time_range,
                        'english_translation': content['content']['english'],
                        'original_kannada': content['content']['kannada'],
                        'extracted_audio_path': extracted_audio_path
                    }
                    results.append(result)

                except Exception as e:
                    print(f"Error processing document: {str(e)}")
                    continue

            # Sort results by relevance score
            results.sort(key=lambda x: x['relevance_score'], reverse=True)
            return results

        except Exception as e:
            print(f"Error getting answer: {str(e)}")
            return []

    def print_results(self, results: List[Dict]):
        """Print results in a formatted way."""
        if not results:
            print("\nNo relevant passages found.")
            return

        print("\nRelevant passages found (ranked by relevance):")
        print("-" * 80)

        for i, result in enumerate(results, 1):
            print(f"\nPassage {i} (Relevance Score: {result['relevance_score']:.3f}):")
            print(f"Audio File: {result['audio_file']}")
            print(f"Time Range: {result['time_range']}")
            print(f"English Translation: {result['english_translation']}")
            print(f"Original Kannada: {result['original_kannada']}")
            print(f"Extracted Audio: {result['extracted_audio_path']}")
            print("-" * 40)

            # Play extracted audio if available
            if result['extracted_audio_path']:
                display(Audio(result['extracted_audio_path']))

    def process_input(self, input_type='text', input_content=None):
        try:
            english_question = None

            if input_type == 'text':
                if not input_content:
                    raise ValueError("No text input provided")
                if self._is_kannada(input_content):
                    english_question = self.translate_to_english(input_content)
                else:
                    english_question = input_content

            elif input_type == 'audio_file':
                if not input_content or not os.path.exists(input_content):
                    raise ValueError("Invalid audio file path")

                audio_filename = os.path.basename(input_content)
                existing_translation = self.get_audio_translation(audio_filename)

                if existing_translation:
                    english_question = existing_translation['content']['english']
                else:
                    kannada_text = self.transcribe_audio(input_content)
                    if not kannada_text:
                        raise ValueError("Failed to transcribe audio")
                    english_question = self.translate_to_english(kannada_text)

            elif input_type == 'speech':
                audio_path = self.record_audio_colab()
                if audio_path:
                    kannada_text = self.transcribe_audio(audio_path)
                    if not kannada_text:
                        raise ValueError("Failed to transcribe speech")
                    english_question = self.translate_to_english(kannada_text)
                    os.remove(audio_path)  # Clean up recording
                else:
                    raise ValueError("No audio recorded")

            if not english_question:
                raise ValueError("Failed to process input")

            results = self.answer_question(english_question)
            if not results:
                print("No relevant information found for your query.")
            return results

        except Exception as e:
            print(f"Error processing input: {str(e)}")
            return []

    def start_qa_session(self):
        """Start an interactive QA session."""
        print("\nWelcome to the Sandalwood News QA System!")
        print("You can:")
        print("1. Type your question (English or Kannada)")
        print("2. Provide an audio file path")
        print("3. Speak your question (using Colab's audio recorder)")
        print("Type 'quit' to exit")

        while True:
            print("\nChoose input method:")
            print("1. Text")
            print("2. Audio File")
            print("3. Speech")
            choice = input("Enter choice (1-3): ")

            if choice.lower() == 'quit':
                break

            try:
                if choice == '1':
                    question = input("\nEnter your question: ")
                    if question.lower() == 'quit':
                        break
                    results = self.process_input('text', question)

                elif choice == '2':
                    audio_path = input("\nEnter audio file path: ")
                    if audio_path.lower() == 'quit':
                        break
                    results = self.process_input('audio_file', audio_path)

                elif choice == '3':
                    print("\nPreparing audio recorder...")
                    results = self.process_input('speech')

                else:
                    print("Invalid choice!")
                    continue

                self.print_results(results)

            except Exception as e:
                print(f"Error: {str(e)}")
                continue

# Example usage
if __name__ == "__main__":
    # Define paths to required files
    csv_path = "/content/allfilescombined.csv"
    kannada_asr_path = '/content/ai4b_indicConformer_kn.nemo'

    try:
        # Initialize the QA system
        print("Initializing Sandalwood QA System...")
        qa_system = SandalwoodQASystem(csv_path, kannada_asr_path)

        # Start interactive session
        print("\nSystem initialized successfully!")
        print("Starting interactive QA session...")
        qa_system.start_qa_session()

    except FileNotFoundError as e:
        print(f"Error: Required file not found - {str(e)}")
        print("Please ensure both the CSV file and Kannada ASR model are in the correct locations.")

    except Exception as e:
        print(f"Error initializing QA system: {str(e)}")
        print("Please check your configuration and try again.")

    finally:
        print("\nThank you for using the Sandalwood QA System!")

Initializing Sandalwood QA System...
[NeMo I 2024-11-17 16:10:43 mixins:198] _setup_tokenizer: detected an aggregate tokenizer
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:10:43 mixins:335] Tokenizer SentencePieceTo

[NeMo W 2024-11-17 16:10:51 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - /nlsasfs/home/ai4bharat/ai4bharat-pr/speechteam/indicasr_v3/manifests/nemo/vistaar_v3/train/train_kannada.json
    sample_rate: 16000
    batch_size: 8
    shuffle: false
    num_workers: 16
    pin_memory: true
    max_duration: 30.0
    min_duration: 0.2
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    is_concat: true
    concat_sampling_technique: temperature
    concat_sampling_temperature: 1.5
    return_language_id: true
    
[NeMo W 2024-11-17 16:10:51 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configurati

[NeMo I 2024-11-17 16:10:51 features:289] PADDING: 0
[NeMo I 2024-11-17 16:10:54 rnnt:1663] Vocab size for each language: 256
[NeMo I 2024-11-17 16:10:54 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:10:54 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:10:55 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:10:55 hybrid_rnnt_ctc_bpe_models:105] Creating masks for multi-softmax layer.
[NeMo I 2024-11-17 16:10:55 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:10:56 save_restore_connector:263] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /content/ai4b_indicConformer_kn.nemo.

System initial

In [9]:
import pandas as pd
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
import ast
import re

def load_and_prepare_data(csv_path):
    df = pd.read_csv(csv_path)


    df['time_aligned_transcripts'] = df['time_aligned_transcripts'].apply(ast.literal_eval)
    df['time_aligned_translations'] = df['time_aligned_translations'].apply(ast.literal_eval)

    return df

def create_documents(df):
    documents = []

    for idx, row in df.iterrows():

        for time_range, translation in row['time_aligned_translations'].items():

            metadata = {
                'audio_file': row['audio_file'],
                'time_range': time_range,
                'original_text': row['time_aligned_transcripts'].get(time_range, ''),
                'full_translation': row['full_transcript_translation']
            }

            doc = Document(
                page_content=translation,
                metadata=metadata
            )
            documents.append(doc)

    return documents

# Initialize vector store
def initialize_vector_store(documents):
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    )

    # Create FAISS index
    vector_store = FAISS.from_documents(documents, embeddings)

    return vector_store

def answer_question(question, vector_store, k=3):
    docs = vector_store.similarity_search(question, k=k)

    print("\nRelevant passages found:")
    print("-" * 80)

    for i, doc in enumerate(docs, 1):
        print(f"\nPassage {i}:")
        print(f"Audio File: {doc.metadata['audio_file']}")
        print(f"Time Range: {doc.metadata['time_range']}")
        print(f"English Translation: {doc.page_content}")
        print(f"Original Kannada: {doc.metadata['original_text']}")
        print("-" * 40)

    return docs

def setup_qa_system(csv_path):
    print("Loading data...")
    df = load_and_prepare_data(csv_path)

    print("Creating documents...")
    documents = create_documents(df)

    print("Initializing vector store...")
    vector_store = initialize_vector_store(documents)

    return vector_store

if __name__ == "__main__":
    csv_path = "/content/allfilescombined.csv"
    vector_store = setup_qa_system(csv_path)

    print("\nWelcome to the Q&A System!")
    print("Type 'quit' or 'exit' to end the session")

    while True:
        question = input("\nEnter your question: ").strip()

        if question.lower() in ['quit', 'exit']:
            print("Thank you for using the Q&A system. Goodbye!")
            break

        if not question:
            print("Please enter a valid question.")
            continue

        print(f"\nSearching for answer to: {question}")
        relevant_docs = answer_question(question, vector_store)

Loading data...
Creating documents...
Initializing vector store...


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.13k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Welcome to the Q&A System!
Type 'quit' or 'exit' to end the session

Enter your question: What is mentioned about farmers?

Searching for answer to: What is mentioned about farmers?

Relevant passages found:
--------------------------------------------------------------------------------

Passage 1:
Audio File: SandalWoodNewsStories_156.mp3
Time Range: 290.00s - 300.00s
English Translation: ([Farmers are tilling the soil and doing all the ploughing and sowing in the agricultural land.], [Farmers are tilling the soil and doing all the ploughing and sowing in the agricultural land.])
Original Kannada: ([' ಫಾರ್ಮರ್ಸ್ ಆಗ್ರಿಕಲ್ಚರ ಲಂಡ್ ಅಲ್ಲಿ ಫೂಲ ಉಳುಮೆ ಮಾಡ್ಕೊಂಡು ಪ್ಲೋಇಂಗ್ ಎಲ್ಲ ಮಾಡಿ ನೆಟ್ಬೋದು ಆ'], [' ಫಾರ್ಮರ್ಸ್ ಆಗ್ರಿಕಲ್ಚರ ಲಂಡ್ ಅಲ್ಲಿ ಫೂಲ ಉಳುಮೆ ಮಾಡ್ಕೊಂಡು ಪ್ಲೋಇಂಗ್ ಎಲ್ಲ ಮಾಡಿ ನೆಟ್ಬೋದು ಆ'])
----------------------------------------

Passage 2:
Audio File: SandalWoodNewsStories_42.mp3
Time Range: 900.00s - 910.00s
English Translation: ([' Farmers' Own '], [' Farmers' Own '])
Original Kannada: ([' ರೈತಾ್ ಯ

In [10]:
import pandas as pd
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from pydub import AudioSegment
import ast
import re
import os
from typing import List, Dict, Tuple
import torch.nn.functional as F
import torch

class AudioRAGSystem:
    def __init__(self, csv_path: str, audio_dir: str, output_dir: str = "./extracted_segments"):
        """
        Initialize the RAG system with paths for data and audio files.

        Args:
            csv_path: Path to the CSV file containing transcriptions and translations
            audio_dir: Directory containing the audio files
            output_dir: Directory to save extracted audio segments
        """
        self.csv_path = csv_path
        self.audio_dir = audio_dir
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

        # Load data and initialize system
        self.df = self.load_and_prepare_data()
        self.documents = self.create_documents()
        self.vector_store = self.initialize_vector_store()

        # Cache for loaded audio files
        self.audio_cache = {}

    def parse_time(self, time_str: str) -> float:
        """
        Parse time string to float, handling various formats.

        Args:
            time_str: Time string (e.g., "0.00s", "0.00s ", " 0.00s", etc.)

        Returns:
            float: Time in seconds
        """
        cleaned = time_str.strip().rstrip('s').strip()
        return float(cleaned)

    def load_and_prepare_data(self) -> pd.DataFrame:
        """Load and prepare the CSV data."""
        df = pd.read_csv(self.csv_path)

        def safe_eval(x):
            try:
                return ast.literal_eval(x) if isinstance(x, str) else x
            except (ValueError, SyntaxError):
                return {}

        df['time_aligned_transcripts'] = df['time_aligned_transcripts'].apply(safe_eval)
        df['time_aligned_translations'] = df['time_aligned_translations'].apply(safe_eval)
        return df

    def create_documents(self) -> List[Document]:
        """Create documents for vector store with merged time segments."""
        documents = []

        for idx, row in self.df.iterrows():
            try:

                time_ranges = sorted(
                    row['time_aligned_translations'].keys(),
                    key=lambda x: self.parse_time(x.split('-')[0])
                )

                for i in range(len(time_ranges)):
                    combined_text = ""
                    combined_original = ""
                    start_time = self.parse_time(time_ranges[i].split('-')[0])

                    j = i
                    while j < len(time_ranges):
                        end_time = self.parse_time(time_ranges[j].split('-')[1])
                        if end_time - start_time > 30:
                            break

                        current_translation = row['time_aligned_translations'].get(time_ranges[j], "")
                        current_transcript = row['time_aligned_transcripts'].get(time_ranges[j], "")

                        if current_translation:
                            combined_text += " " + current_translation
                        if current_transcript:
                            combined_original += " " + current_transcript
                        j += 1

                    if not combined_text.strip() or not combined_original.strip():
                        continue

                    metadata = {
                        'audio_file': row['audio_file'],
                        'start_time': start_time,
                        'end_time': end_time,
                        'time_range': f"{start_time:.2f}s - {end_time:.2f}s",
                        'original_text': combined_original.strip(),
                        'full_translation': row.get('full_transcript_translation', '')
                    }

                    doc = Document(
                        page_content=combined_text.strip(),
                        metadata=metadata
                    )
                    documents.append(doc)
            except Exception as e:
                print(f"Error processing row {idx}: {str(e)}")
                continue

        return documents

    def initialize_vector_store(self) -> FAISS:
        """Initialize the FAISS vector store."""
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        )
        return FAISS.from_documents(self.documents, embeddings)

    def calculate_relevance_score(self, question: str, doc: Document) -> float:
        """
        Calculate a relevance score for a document relative to the question.
        Uses a combination of semantic similarity and keyword matching.
        """
        try:

            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
            )
            question_emb = embeddings.embed_query(question)
            doc_emb = embeddings.embed_query(doc.page_content)

            semantic_score = F.cosine_similarity(
                torch.tensor(question_emb).unsqueeze(0),
                torch.tensor(doc_emb).unsqueeze(0)
            ).item()

            question_words = set(question.lower().split())
            doc_words = set(doc.page_content.lower().split())
            keyword_score = len(question_words.intersection(doc_words)) / len(question_words)

            final_score = 0.7 * semantic_score + 0.3 * keyword_score

            return final_score
        except Exception as e:
            print(f"Error calculating relevance score: {str(e)}")
            return 0.0

    def extract_audio_segment(self, audio_file: str, start_time: float, end_time: float) -> str:
        """Extract and save an audio segment."""
        try:

            if audio_file not in self.audio_cache:
                audio_path = os.path.join(self.audio_dir, audio_file)
                self.audio_cache[audio_file] = AudioSegment.from_mp3(audio_path)

            audio = self.audio_cache[audio_file]

            start_ms = int(start_time * 1000)
            end_ms = int(end_time * 1000)

            segment = audio[start_ms:end_ms]

            output_filename = f"segment_{audio_file}_{start_time:.2f}_{end_time:.2f}.mp3"
            output_path = os.path.join(self.output_dir, output_filename)
            segment.export(output_path, format='mp3')

            return output_path
        except Exception as e:
            print(f"Error extracting audio segment: {str(e)}")
            return ""

    def answer_question(self, question: str, k: int = 5) -> List[Dict]:
        """
        Answer a question by retrieving and ranking relevant passages.
        Also extracts corresponding audio segments.
        """
        try:

            docs = self.vector_store.similarity_search(question, k=k)

            scored_docs = [
                (doc, self.calculate_relevance_score(question, doc))
                for doc in docs
            ]
            scored_docs.sort(key=lambda x: x[1], reverse=True)

            results = []
            for doc, score in scored_docs:

                audio_path = self.extract_audio_segment(
                    doc.metadata['audio_file'],
                    doc.metadata['start_time'],
                    doc.metadata['end_time']
                )

                result = {
                    'relevance_score': score,
                    'audio_file': doc.metadata['audio_file'],
                    'time_range': doc.metadata['time_range'],
                    'english_translation': doc.page_content,
                    'original_kannada': doc.metadata['original_text'],
                    'extracted_audio_path': audio_path
                }
                results.append(result)

            return results
        except Exception as e:
            print(f"Error answering question: {str(e)}")
            return []

    def print_results(self, results: List[Dict]):
        """Print results in a formatted way."""
        if not results:
            print("\nNo relevant passages found.")
            return

        print("\nRelevant passages found (ranked by relevance):")
        print("-" * 80)

        for i, result in enumerate(results, 1):
            print(f"\nPassage {i} (Relevance Score: {result['relevance_score']:.3f}):")
            print(f"Audio File: {result['audio_file']}")
            print(f"Time Range: {result['time_range']}")
            print(f"English Translation: {result['english_translation']}")
            print(f"Original Kannada: {result['original_kannada']}")
            print(f"Extracted Audio: {result['extracted_audio_path']}")
            print("-" * 40)

In [12]:

csv_path = "/content/allfilescombined.csv"
audio_dir = "/content/audio-kannada"
output_dir = "/content/extracted_segments2"

rag_system = AudioRAGSystem(csv_path, audio_dir, output_dir)

def interactive_qa():
    while True:
        question = input("\nEnter your question (or 'quit' to exit): ")
        if question.lower() == 'quit':
            break

        print("\nSearching for answer...")
        results = rag_system.answer_question(question)
        rag_system.print_results(results)

if __name__ == "__main__":

    sample_question = "What is mentioned about farmers?"
    results = rag_system.answer_question(sample_question)
    rag_system.print_results(results)
    print("\nEntering interactive mode...")
    interactive_qa()


Relevant passages found (ranked by relevance):
--------------------------------------------------------------------------------

Passage 1 (Relevance Score: 0.661):
Audio File: SandalWoodNewsStories_9.mp3
Time Range: 650.00s - 690.00s
English Translation: ([My contact with farmers is continuous, different organizations come and hundreds of farmers come under one roof, they themselves come and gather there], [My contact with farmers is continuous, different organizations come and hundreds of farmers come under one roof, they themselves come and gather there]) ([' Every third Sunday of the month, we have a group discussion where farmers can discuss any confusions they have about agroforestry, what to do and not to do'], [' Every third Sunday of the month, we have a group discussion where farmers can discuss any confusions they have about agroforestry, what to do and not to do']) ([' And so what I do is every third Sunday of the month at 9 am, I have a session on agro forestry models on 

In [13]:
import torch
import torchaudio
import nemo.collections.asr as nemo_asr
import gc
import os
from typing import Tuple
from pathlib import Path

def setup_model(model_path: str) -> nemo_asr.models.EncDecCTCModel:
    """Initialize the ASR model."""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
    model.freeze()
    model = model.to(device)
    return model

def process_audio(audio_path: str) -> Tuple[torch.Tensor, int]:
    """Load and preprocess audio file."""
    waveform, sample_rate = torchaudio.load(audio_path)
    resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
    resampled_waveform = resampler(waveform)

    # Convert stereo to mono if necessary
    if resampled_waveform.size(0) > 1:
        resampled_waveform = resampled_waveform.mean(dim=0, keepdim=True)

    return resampled_waveform.squeeze(0), 16000

def transcribe_single_audio(model: nemo_asr.models.EncDecCTCModel, audio_path: str) -> str:
    """Transcribe a single audio file and return the transcription."""
    try:
        # Process audio
        input_signal, _ = process_audio(audio_path)
        input_signal = input_signal.to(model.device)

        # Get transcription
        transcript = model.transcribe(
            input_signal,
            batch_size=1,
            logprobs=False,
            language_id='kn'
        )[0]

        # Clean up
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        del input_signal
        gc.collect()

        return transcript

    except Exception as e:
        return f"Error processing audio: {str(e)}"

def main():
    # Configuration
    model_path = '/content/ai4b_indicConformer_kn.nemo'  # Update with your model path

    print("Welcome to the Kannada Audio Transcription System!")

    # Initialize model
    try:
        print("\nLoading the transcription model...")
        model = setup_model(model_path)
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return

    while True:
        # Get audio file path from user
        print("\nEnter the path to your audio file (or 'quit' to exit):")
        audio_path = input().strip()

        if audio_path.lower() == 'quit':
            print("Thank you for using the transcription system. Goodbye!")
            break

        # Verify file exists
        if not os.path.exists(audio_path):
            print("Error: File does not exist. Please check the path and try again.")
            continue

        # Verify file is an audio file
        if not audio_path.lower().endswith(('.mp3', '.wav', '.flac', '.ogg')):
            print("Error: File must be an audio file (mp3, wav, flac, or ogg)")
            continue

        print(f"\nTranscribing {os.path.basename(audio_path)}...")
        transcript = transcribe_single_audio(model, audio_path)

        print("\nTranscription Result:")
        print("-" * 50)
        print(transcript)
        print("-" * 50)

    # Clean up
    del model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

if __name__ == "__main__":
    main()

Welcome to the Kannada Audio Transcription System!

Loading the transcription model...
[NeMo I 2024-11-17 16:40:02 mixins:198] _setup_tokenizer: detected an aggregate tokenizer
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:40:02 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11

[NeMo W 2024-11-17 16:40:10 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - /nlsasfs/home/ai4bharat/ai4bharat-pr/speechteam/indicasr_v3/manifests/nemo/vistaar_v3/train/train_kannada.json
    sample_rate: 16000
    batch_size: 8
    shuffle: false
    num_workers: 16
    pin_memory: true
    max_duration: 30.0
    min_duration: 0.2
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    is_concat: true
    concat_sampling_technique: temperature
    concat_sampling_temperature: 1.5
    return_language_id: true
    
[NeMo W 2024-11-17 16:40:10 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configurati

[NeMo I 2024-11-17 16:40:10 features:289] PADDING: 0
[NeMo I 2024-11-17 16:40:13 rnnt:1663] Vocab size for each language: 256
[NeMo I 2024-11-17 16:40:13 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:40:13 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:40:16 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:40:16 hybrid_rnnt_ctc_bpe_models:105] Creating masks for multi-softmax layer.
[NeMo I 2024-11-17 16:40:16 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:40:17 save_restore_connector:263] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /content/ai4b_indicConformer_kn.nemo.
Model loaded su

Transcribing: 100%|██████████| 1/1 [00:01<00:00,  1.69s/it]



Transcription Result:
--------------------------------------------------
[' ರೈತರ ಬಗ್ಗೆ ಏನು ಪ್ರಸ್ತಾಪಿಸಲಾಗಿದೆ']
--------------------------------------------------

Enter the path to your audio file (or 'quit' to exit):
quit
Thank you for using the transcription system. Goodbye!


In [14]:
import torch
import torchaudio
import nemo.collections.asr as nemo_asr
import gc
import os
from typing import Tuple
from pathlib import Path
from langchain_google_genai import ChatGoogleGenerativeAI

def setup_model(model_path: str) -> nemo_asr.models.EncDecCTCModel:
    """Initialize the ASR model."""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
    model.freeze()
    model = model.to(device)
    return model

def process_audio(audio_path: str) -> Tuple[torch.Tensor, int]:
    """Load and preprocess audio file."""
    waveform, sample_rate = torchaudio.load(audio_path)
    resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
    resampled_waveform = resampler(waveform)

    # Convert stereo to mono if necessary
    if resampled_waveform.size(0) > 1:
        resampled_waveform = resampled_waveform.mean(dim=0, keepdim=True)

    return resampled_waveform.squeeze(0), 16000

def translate_text(text: str, llm: ChatGoogleGenerativeAI) -> str:
    """Translate Kannada text to English using Gemini."""
    prompt = f"Translate this Kannada text to English: {text}"
    try:
        response = llm.invoke(prompt)
        return str(response.content)
    except Exception as e:
        return f"Translation error: {str(e)}"

def transcribe_and_translate_audio(model: nemo_asr.models.EncDecCTCModel,
                                 llm: ChatGoogleGenerativeAI,
                                 audio_path: str) -> tuple:
    """Transcribe audio and translate the transcription."""
    try:
        # Process audio
        input_signal, _ = process_audio(audio_path)
        input_signal = input_signal.to(model.device)

        # Get transcription
        transcript = model.transcribe(
            input_signal,
            batch_size=1,
            logprobs=False,
            language_id='kn'
        )[0]

        # Get translation
        translation = translate_text(transcript, llm)

        # Clean up
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        del input_signal
        gc.collect()

        return transcript, translation

    except Exception as e:
        return f"Error processing audio: {str(e)}", "Translation not available due to error"

def main():
    # Configuration
    model_path = '/content/ai4b_indicConformer_kn.nemo'  # Update with your model path
    os.environ["GOOGLE_API_KEY"] = "AIzaSyC29gObkycJDBjVkEWjhJoJO-HVB0pC00E"  # Replace with your key

    print("Welcome to the Kannada Audio Transcription and Translation System!")

    # Initialize models
    try:
        print("\nLoading the transcription model...")
        asr_model = setup_model(model_path)
        print("ASR Model loaded successfully!")

        print("\nInitializing translation model...")
        llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)
        print("Translation model initialized!")
    except Exception as e:
        print(f"Error loading models: {str(e)}")
        return

    while True:
        # Get audio file path from user
        print("\nEnter the path to your audio file (or 'quit' to exit):")
        audio_path = input().strip()

        if audio_path.lower() == 'quit':
            print("Thank you for using the system. Goodbye!")
            break

        # Verify file exists
        if not os.path.exists(audio_path):
            print("Error: File does not exist. Please check the path and try again.")
            continue

        # Verify file is an audio file
        if not audio_path.lower().endswith(('.mp3', '.wav', '.flac', '.ogg')):
            print("Error: File must be an audio file (mp3, wav, flac, or ogg)")
            continue

        print(f"\nProcessing {os.path.basename(audio_path)}...")
        transcript, translation = transcribe_and_translate_audio(asr_model, llm, audio_path)

        print("\nResults:")
        print("-" * 50)
        print("Original Kannada Transcription:")
        print(transcript)
        print("\nEnglish Translation:")
        print(translation)
        print("-" * 50)

    # Clean up
    del asr_model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

if __name__ == "__main__":
    main()

Welcome to the Kannada Audio Transcription and Translation System!

Loading the transcription model...
[NeMo I 2024-11-17 16:46:19 mixins:198] _setup_tokenizer: detected an aggregate tokenizer
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:46:19 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens

[NeMo W 2024-11-17 16:46:29 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - /nlsasfs/home/ai4bharat/ai4bharat-pr/speechteam/indicasr_v3/manifests/nemo/vistaar_v3/train/train_kannada.json
    sample_rate: 16000
    batch_size: 8
    shuffle: false
    num_workers: 16
    pin_memory: true
    max_duration: 30.0
    min_duration: 0.2
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    is_concat: true
    concat_sampling_technique: temperature
    concat_sampling_temperature: 1.5
    return_language_id: true
    
[NeMo W 2024-11-17 16:46:29 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configurati

[NeMo I 2024-11-17 16:46:29 features:289] PADDING: 0
[NeMo I 2024-11-17 16:46:31 rnnt:1663] Vocab size for each language: 256
[NeMo I 2024-11-17 16:46:32 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:46:32 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:46:34 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:46:34 hybrid_rnnt_ctc_bpe_models:105] Creating masks for multi-softmax layer.
[NeMo I 2024-11-17 16:46:34 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:46:35 save_restore_connector:263] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /content/ai4b_indicConformer_kn.nemo.
ASR Model loade

Transcribing: 100%|██████████| 1/1 [00:00<00:00, 21.49it/s]



Results:
--------------------------------------------------
Original Kannada Transcription:
[' ರೈತರ ಬಗ್ಗೆ ಏನು ಪ್ರಸ್ತಾಪಿಸಲಾಗಿದೆ']

English Translation:
['What is proposed about farmers']
--------------------------------------------------

Enter the path to your audio file (or 'quit' to exit):
quit
Thank you for using the system. Goodbye!


In [15]:
import torch
import torchaudio
import nemo.collections.asr as nemo_asr
import gc
import os
from typing import List, Dict, Tuple
from pathlib import Path
from langchain_google_genai import ChatGoogleGenerativeAI
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from pydub import AudioSegment
import ast
import torch.nn.functional as F

class CombinedAudioSystem:
    def __init__(self, model_path: str, csv_path: str, audio_dir: str, output_dir: str = "./extracted_segments"):
        """Initialize both ASR and RAG systems"""
        # Initialize ASR model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.asr_model = self.setup_model(model_path)

        # Initialize Gemini
        os.environ["GOOGLE_API_KEY"] = "AIzaSyC29gObkycJDBjVkEWjhJoJO-HVB0pC00E"
        self.llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)

        # Initialize RAG system
        self.csv_path = csv_path
        self.audio_dir = audio_dir
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

        # Load existing data and initialize RAG
        self.df = self.load_and_prepare_data()
        self.documents = self.create_documents()
        self.vector_store = self.initialize_vector_store()
        self.audio_cache = {}

    def setup_model(self, model_path: str) -> nemo_asr.models.EncDecCTCModel:
        """Initialize the ASR model."""
        model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
        model.freeze()
        model = model.to(self.device)
        return model

    def process_audio(self, audio_path: str) -> Tuple[torch.Tensor, int]:
        """Load and preprocess audio file."""
        waveform, sample_rate = torchaudio.load(audio_path)
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        resampled_waveform = resampler(waveform)

        if resampled_waveform.size(0) > 1:
            resampled_waveform = resampled_waveform.mean(dim=0, keepdim=True)

        return resampled_waveform.squeeze(0), 16000

    def translate_text(self, text: str) -> str:
        """Translate Kannada text to English using Gemini."""
        prompt = f"Translate this Kannada text to English: {text}"
        try:
            response = self.llm.invoke(prompt)
            return str(response.content)
        except Exception as e:
            return f"Translation error: {str(e)}"

    def transcribe_and_translate_audio(self, audio_path: str) -> tuple:
        """Transcribe audio and translate the transcription."""
        try:
            input_signal, _ = self.process_audio(audio_path)
            input_signal = input_signal.to(self.device)

            transcript = self.asr_model.transcribe(
                input_signal,
                batch_size=1,
                logprobs=False,
                language_id='kn'
            )[0]

            translation = self.translate_text(transcript)

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            del input_signal
            gc.collect()

            return transcript, translation

        except Exception as e:
            return f"Error processing audio: {str(e)}", "Translation not available due to error"

    def load_and_prepare_data(self) -> pd.DataFrame:
        """Load and prepare the CSV data."""
        df = pd.read_csv(self.csv_path)
        df['time_aligned_transcripts'] = df['time_aligned_transcripts'].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
        df['time_aligned_translations'] = df['time_aligned_translations'].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
        return df

    def create_documents(self) -> List[Document]:
        """Create documents for vector store."""
        documents = []
        for idx, row in self.df.iterrows():
            for time_range, translation in row['time_aligned_translations'].items():
                metadata = {
                    'audio_file': row['audio_file'],
                    'time_range': time_range,
                    'original_text': row['time_aligned_transcripts'].get(time_range, ''),
                    'full_translation': row['full_transcript_translation']
                }
                doc = Document(
                    page_content=translation,
                    metadata=metadata
                )
                documents.append(doc)
        return documents

    def initialize_vector_store(self) -> FAISS:
        """Initialize the FAISS vector store."""
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        )
        return FAISS.from_documents(self.documents, embeddings)

    def calculate_relevance_score(self, question: str, doc: Document) -> float:
        """Calculate relevance score for document."""
        try:
            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
            )
            question_emb = embeddings.embed_query(question)
            doc_emb = embeddings.embed_query(doc.page_content)

            semantic_score = F.cosine_similarity(
                torch.tensor(question_emb).unsqueeze(0),
                torch.tensor(doc_emb).unsqueeze(0)
            ).item()

            return semantic_score
        except Exception as e:
            print(f"Error calculating relevance score: {str(e)}")
            return 0.0

    def answer_question(self, question: str, k: int = 3) -> List[Dict]:
        """Search for relevant passages and return results."""
        docs = self.vector_store.similarity_search(question, k=k)
        results = []

        for doc in docs:
            score = self.calculate_relevance_score(question, doc)
            result = {
                'relevance_score': score,
                'audio_file': doc.metadata['audio_file'],
                'time_range': doc.metadata['time_range'],
                'english_translation': doc.page_content,
                'original_kannada': doc.metadata['original_text']
            }
            results.append(result)

        return results

    def print_results(self, results: List[Dict]):
        """Print results in a formatted way."""
        if not results:
            print("\nNo relevant passages found.")
            return

        print("\nRelevant passages found (ranked by relevance):")
        print("-" * 80)

        for i, result in enumerate(results, 1):
            print(f"\nPassage {i} (Relevance Score: {result['relevance_score']:.3f}):")
            print(f"Audio File: {result['audio_file']}")
            print(f"Time Range: {result['time_range']}")
            print(f"English Translation: {result['english_translation']}")
            print(f"Original Kannada: {result['original_kannada']}")
            print("-" * 40)

def main():
    # Configuration
    model_path = '/content/ai4b_indicConformer_kn.nemo'
    csv_path = "/content/allfilescombined.csv"
    audio_dir = "/content/audio-kannada"
    output_dir = "/content/extracted_segments3"

    print("Welcome to the Combined Audio Processing System!")

    try:
        print("\nInitializing system...")
        system = CombinedAudioSystem(model_path, csv_path, audio_dir, output_dir)
        print("System initialized successfully!")
    except Exception as e:
        print(f"Error initializing system: {str(e)}")
        return

    while True:
        print("\nChoose an option:")
        print("1. Transcribe and translate new audio")
        print("2. Search existing transcriptions")
        print("3. Quit")

        choice = input("\nEnter your choice (1-3): ").strip()

        if choice == "1":
            print("\nEnter the path to your audio file:")
            audio_path = input().strip()

            if not os.path.exists(audio_path):
                print("Error: File does not exist.")
                continue

            if not audio_path.lower().endswith(('.mp3', '.wav', '.flac', '.ogg')):
                print("Error: File must be an audio file (mp3, wav, flac, or ogg)")
                continue

            print(f"\nProcessing {os.path.basename(audio_path)}...")
            transcript, translation = system.transcribe_and_translate_audio(audio_path)

            print("\nResults:")
            print("-" * 50)
            print("Original Kannada Transcription:")
            print(transcript)
            print("\nEnglish Translation:")
            print(translation)
            print("-" * 50)

        elif choice == "2":
            question = input("\nEnter your search query: ")
            results = system.answer_question(question)
            system.print_results(results)

        elif choice == "3":
            print("Thank you for using the system. Goodbye!")
            break

        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()

Welcome to the Combined Audio Processing System!

Initializing system...
[NeMo I 2024-11-17 16:50:50 mixins:198] _setup_tokenizer: detected an aggregate tokenizer
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 16:50:50 m

[NeMo W 2024-11-17 16:51:01 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - /nlsasfs/home/ai4bharat/ai4bharat-pr/speechteam/indicasr_v3/manifests/nemo/vistaar_v3/train/train_kannada.json
    sample_rate: 16000
    batch_size: 8
    shuffle: false
    num_workers: 16
    pin_memory: true
    max_duration: 30.0
    min_duration: 0.2
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    is_concat: true
    concat_sampling_technique: temperature
    concat_sampling_temperature: 1.5
    return_language_id: true
    
[NeMo W 2024-11-17 16:51:01 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configurati

[NeMo I 2024-11-17 16:51:01 features:289] PADDING: 0
[NeMo I 2024-11-17 16:51:03 rnnt:1663] Vocab size for each language: 256
[NeMo I 2024-11-17 16:51:03 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:51:03 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:51:05 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:51:05 hybrid_rnnt_ctc_bpe_models:105] Creating masks for multi-softmax layer.
[NeMo I 2024-11-17 16:51:05 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 16:51:05 save_restore_connector:263] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /content/ai4b_indicConformer_kn.nemo.
System initiali

Transcribing: 100%|██████████| 1/1 [00:00<00:00, 22.82it/s]



Results:
--------------------------------------------------
Original Kannada Transcription:
[' ರೈತರ ಬಗ್ಗೆ ಏನು ಪ್ರಸ್ತಾಪಿಸಲಾಗಿದೆ']

English Translation:
[' What is proposed about farmers']
--------------------------------------------------

Choose an option:
1. Transcribe and translate new audio
2. Search existing transcriptions
3. Quit

Enter your choice (1-3): 2

Enter your search query: what is proposed about farmers

Relevant passages found (ranked by relevance):
--------------------------------------------------------------------------------

Passage 1 (Relevance Score: 0.690):
Audio File: SandalWoodNewsStories_174.mp3
Time Range: 570.00s - 580.00s
English Translation: [' The cropping system should provide income to the farmer in every season. Also, after forty-five to fifty years, it is not possible to work on the farm because by then, one can earn one lakh rupees by sitting in the room'], [' The cropping system should provide income to the farmer in every season. Also, after fort

In [19]:
import torch
import torchaudio
import nemo.collections.asr as nemo_asr
import gc
import os
from typing import List, Dict, Tuple
from pathlib import Path
from langchain_google_genai import ChatGoogleGenerativeAI
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from pydub import AudioSegment
import ast
import torch.nn.functional as F
import re

class CombinedAudioSystem:
    def __init__(self, model_path: str, csv_path: str, audio_dir: str, output_dir: str = "./extracted_segments"):
        """Initialize both ASR and RAG systems"""
        # Initialize ASR model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.asr_model = self.setup_model(model_path)

        # Initialize Gemini
        os.environ["GOOGLE_API_KEY"] = "AIzaSyC29gObkycJDBjVkEWjhJoJO-HVB0pC00E"
        self.llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)

        # Initialize RAG system
        self.csv_path = csv_path
        self.audio_dir = audio_dir
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

        # Load existing data and initialize RAG
        self.df = self.load_and_prepare_data()
        self.documents = self.create_documents()
        self.vector_store = self.initialize_vector_store()
        self.audio_cache = {}

    def setup_model(self, model_path: str) -> nemo_asr.models.EncDecCTCModel:
        """Initialize the ASR model."""
        model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
        model.freeze()
        model = model.to(self.device)
        return model

    def process_audio(self, audio_path: str) -> Tuple[torch.Tensor, int]:
        """Load and preprocess audio file."""
        waveform, sample_rate = torchaudio.load(audio_path)
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        resampled_waveform = resampler(waveform)

        if resampled_waveform.size(0) > 1:
            resampled_waveform = resampled_waveform.mean(dim=0, keepdim=True)

        return resampled_waveform.squeeze(0), 16000

    def translate_text(self, text: str) -> str:
        """Translate Kannada text to English and extract questions."""
        prompt = f"Translate this Kannada text to English and identify any questions in the text. Format: TRANSLATION: <translation>\nQUESTIONS: <list of questions>: {text}"
        try:
            response = self.llm.invoke(prompt)
            response_text = str(response.content)

            # Parse the response to separate translation and questions
            translation_match = re.search(r"TRANSLATION:(.*?)(?=QUESTIONS:|$)", response_text, re.DOTALL)
            questions_match = re.search(r"QUESTIONS:(.*?)$", response_text, re.DOTALL)

            translation = translation_match.group(1).strip() if translation_match else response_text
            questions = questions_match.group(1).strip() if questions_match else ""

            return translation, questions.split('\n') if questions else []
        except Exception as e:
            return f"Translation error: {str(e)}", []

    def transcribe_and_translate_audio(self, audio_path: str) -> tuple:
        """Transcribe audio, translate the transcription, and extract questions."""
        try:
            input_signal, _ = self.process_audio(audio_path)
            input_signal = input_signal.to(self.device)

            transcript = self.asr_model.transcribe(
                input_signal,
                batch_size=1,
                logprobs=False,
                language_id='kn'
            )[0]

            translation, questions = self.translate_text(transcript)

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            del input_signal
            gc.collect()

            return transcript, translation, questions

        except Exception as e:
            return f"Error processing audio: {str(e)}", "Translation not available due to error", []

# [Previous imports and class definition remain the same until extract_audio_segment method]

    def extract_audio_segment(self, audio_file: str, time_range: str) -> str:
        """Extract audio segment based on time range."""
        try:
            # Clean and parse time range string
            # Remove 's' suffix and whitespace, then split on hyphen
            clean_range = time_range.replace('s', '').strip()
            if '-' in clean_range:
                start_str, end_str = clean_range.split('-')
            else:
                # Handle single timestamp format (assuming it's the end time with 0 as start)
                start_str, end_str = '0', clean_range

            # Convert to float, handling any remaining whitespace
            start_time = float(start_str.strip())
            end_time = float(end_str.strip())

            # Create output filename
            audio_name = os.path.splitext(os.path.basename(audio_file))[0]
            output_filename = f"{audio_name}_{start_time:.2f}-{end_time:.2f}.wav"
            output_path = os.path.join(self.output_dir, output_filename)

            # Check if segment already exists
            if os.path.exists(output_path):
                return output_path

            # Load audio file
            audio_path = os.path.join(self.audio_dir, audio_file)
            audio = AudioSegment.from_file(audio_path)

            # Extract segment (convert to milliseconds)
            start_ms = int(start_time * 1000)
            end_ms = int(end_time * 1000)
            segment = audio[start_ms:end_ms]

            # Export segment
            segment.export(output_path, format="wav")
            return output_path

        except Exception as e:
            print(f"Error extracting audio segment: {str(e)}")
            return ""

    def load_and_prepare_data(self) -> pd.DataFrame:
        """Load and prepare the CSV data."""
        df = pd.read_csv(self.csv_path)
        df['time_aligned_transcripts'] = df['time_aligned_transcripts'].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
        df['time_aligned_translations'] = df['time_aligned_translations'].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )
        return df

    def create_documents(self) -> List[Document]:
        """Create documents for vector store."""
        documents = []
        for idx, row in self.df.iterrows():
            for time_range, translation in row['time_aligned_translations'].items():
                metadata = {
                    'audio_file': row['audio_file'],
                    'time_range': time_range,
                    'original_text': row['time_aligned_transcripts'].get(time_range, ''),
                    'full_translation': row['full_transcript_translation']
                }
                doc = Document(
                    page_content=translation,
                    metadata=metadata
                )
                documents.append(doc)
        return documents

    def initialize_vector_store(self) -> FAISS:
        """Initialize the FAISS vector store."""
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        )
        return FAISS.from_documents(self.documents, embeddings)

    def calculate_relevance_score(self, question: str, doc: Document) -> float:
        """Calculate relevance score for document."""
        try:
            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
            )
            question_emb = embeddings.embed_query(question)
            doc_emb = embeddings.embed_query(doc.page_content)

            semantic_score = F.cosine_similarity(
                torch.tensor(question_emb).unsqueeze(0),
                torch.tensor(doc_emb).unsqueeze(0)
            ).item()

            return semantic_score
        except Exception as e:
            print(f"Error calculating relevance score: {str(e)}")
            return 0.0

    def answer_question(self, question: str, k: int = 3) -> List[Dict]:
        """Search for relevant passages and return results with extracted audio segments."""
        docs = self.vector_store.similarity_search(question, k=k)
        results = []

        for doc in docs:
            score = self.calculate_relevance_score(question, doc)

            # Extract audio segment
            extracted_audio_path = self.extract_audio_segment(
                doc.metadata['audio_file'],
                doc.metadata['time_range']
            )

            result = {
                'relevance_score': score,
                'audio_file': doc.metadata['audio_file'],
                'time_range': doc.metadata['time_range'],
                'english_translation': doc.page_content,
                'original_kannada': doc.metadata['original_text'],
                'extracted_audio_path': extracted_audio_path
            }
            results.append(result)

        return sorted(results, key=lambda x: x['relevance_score'], reverse=True)

    def print_results(self, results: List[Dict]):
        """Print results in a formatted way."""
        if not results:
            print("\nNo relevant passages found.")
            return

        print("\nRelevant passages found (ranked by relevance):")
        print("-" * 80)

        for i, result in enumerate(results, 1):
            print(f"\nPassage {i} (Relevance Score: {result['relevance_score']:.3f}):")
            print(f"Audio File: {result['audio_file']}")
            print(f"Time Range: {result['time_range']}")
            print(f"English Translation: {result['english_translation']}")
            print(f"Original Kannada: {result['original_kannada']}")
            print(f"Extracted Audio Segment: {result['extracted_audio_path']}")
            print("-" * 40)

def main():
    # Configuration
    model_path = '/content/ai4b_indicConformer_kn.nemo'
    csv_path = "/content/allfilescombined.csv"
    audio_dir = "/content/audio-kannada"
    output_dir = "/content/extracted_segments3"

    print("Welcome to the Combined Audio Processing System!")

    try:
        print("\nInitializing system...")
        system = CombinedAudioSystem(model_path, csv_path, audio_dir, output_dir)
        print("System initialized successfully!")
    except Exception as e:
        print(f"Error initializing system: {str(e)}")
        return

    while True:
        print("\nChoose an option:")
        print("1. Process audio and answer questions from it")
        print("2. Search existing transcriptions")
        print("3. Quit")

        choice = input("\nEnter your choice (1-3): ").strip()

        if choice == "1":
            print("\nEnter the path to your audio file:")
            audio_path = input().strip()

            if not os.path.exists(audio_path):
                print("Error: File does not exist.")
                continue

            if not audio_path.lower().endswith(('.mp3', '.wav', '.flac', '.ogg')):
                print("Error: File must be an audio file (mp3, wav, flac, or ogg)")
                continue

            print(f"\nProcessing {os.path.basename(audio_path)}...")
            transcript, translation, questions = system.transcribe_and_translate_audio(audio_path)

            print("\nResults:")
            print("-" * 50)
            print("Original Kannada Transcription:")
            print(transcript)
            print("\nEnglish Translation:")
            print(translation)

            if questions:
                print("\nQuestions detected in the audio:")
                for i, question in enumerate(questions, 1):
                    print(f"\nQuestion {i}: {question}")
                    results = system.answer_question(question)
                    system.print_results(results)
            else:
                print("\nNo questions detected in the audio content.")

            print("-" * 50)

        elif choice == "2":
            question = input("\nEnter your search query: ")
            results = system.answer_question(question)
            system.print_results(results)

        elif choice == "3":
            print("Thank you for using the system. Goodbye!")
            break

        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()

Welcome to the Combined Audio Processing System!

Initializing system...
[NeMo I 2024-11-17 17:11:16 mixins:198] _setup_tokenizer: detected an aggregate tokenizer
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 mixins:335] Tokenizer SentencePieceTokenizer initialized with 256 tokens
[NeMo I 2024-11-17 17:11:16 m

[NeMo W 2024-11-17 17:11:26 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - /nlsasfs/home/ai4bharat/ai4bharat-pr/speechteam/indicasr_v3/manifests/nemo/vistaar_v3/train/train_kannada.json
    sample_rate: 16000
    batch_size: 8
    shuffle: false
    num_workers: 16
    pin_memory: true
    max_duration: 30.0
    min_duration: 0.2
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    is_concat: true
    concat_sampling_technique: temperature
    concat_sampling_temperature: 1.5
    return_language_id: true
    
[NeMo W 2024-11-17 17:11:26 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configurati

[NeMo I 2024-11-17 17:11:26 features:289] PADDING: 0
[NeMo I 2024-11-17 17:11:29 rnnt:1663] Vocab size for each language: 256
[NeMo I 2024-11-17 17:11:30 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 17:11:30 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 17:11:32 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 17:11:32 hybrid_rnnt_ctc_bpe_models:105] Creating masks for multi-softmax layer.
[NeMo I 2024-11-17 17:11:32 rnnt_models:220] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
[NeMo I 2024-11-17 17:11:33 save_restore_connector:263] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /content/ai4b_indicConformer_kn.nemo.
System initiali

Transcribing: 100%|██████████| 1/1 [00:00<00:00, 18.41it/s]



Results:
--------------------------------------------------
Original Kannada Transcription:
[' ರೈತರ ಬಗ್ಗೆ ಏನು ಪ್ರಸ್ತಾಪಿಸಲಾಗಿದೆ']

English Translation:
The farmers in the state have been constantly demanding for a separate budget.

Questions detected in the audio:

Question 1: ['What is proposed about the farmers']

Relevant passages found (ranked by relevance):
--------------------------------------------------------------------------------

Passage 1 (Relevance Score: 0.732):
Audio File: SandalWoodNewsStories_286.mp3
Time Range: 200.00s - 210.00s
English Translation: ([ ' Can be stopped and can be facilitated for the next younger generation, as my personal opinion I am requesting all the farmers'], [' Can be stopped and can be facilitated for the next younger generation, as my personal opinion I am requesting all the farmers'])
Original Kannada: ([' ತಡೆ ಹಿಡಿಬಹದು ತಡೆ ಹಿಡಿದು ಮುಂದಿನ ಯುವ ಪೀಳಿಗೆಗೆ ಅನುಕೂಲ ಮಾಡಿ ಕೊಡಬಹುದು ಅಂತ ನನ್ನ ಒಂದು ವೈಯಕ್ತಿಕ ವಿಚಾರವಾಗಿ ಎಲ್ಲ ರೈತರಲ್ಲಿ ನಾನು ಕಳ್ಕಳಿ ಮನವಿ ಮಾಡ್ಕೊಂ