In [None]:
!pip install torch==2.0.1
!pip install transformers
!pip install bitsandbytes
!pip install peft
!pip install accelerate
!pip install datasets
!pip install trl
!pip install einops
!pip install scipy
!pip install --upgrade openvino-nightly

In [None]:
from huggingface_hub import login
login("HF_TOKEN")

In [None]:
import time
from datasets import load_dataset
from trl import SFTTrainer
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments)

file_path = "questionsv4.csv"
dataset = load_dataset("csv", data_files={file_path})
model_name = "google/gemma-2b"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

In [None]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def formatting_prompts_func(examples):

    inputs = [" " for i in range(len(dataset['train']['questions']))]
    print(len(inputs))
    EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

    instructions = examples["questions"]
    inputs       = inputs
    outputs      = examples["answers"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instruction, input, output) + EOS_TOKEN
        #print(text)
        texts.append(text)
    return { "text" : texts}
pass

from datasets import load_dataset
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
training_arguments = TrainingArguments(
        output_dir="./results",
        bf16=True,
        use_ipex=True,
        no_cuda=True,
        fp16_full_eval=False,
    )

In [None]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset["train"],
        dataset_text_field="text",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=training_arguments,
        packing=True,
    )

trainer.train()

In [None]:
new_model = "finetuned-gemma"
trainer.model.save_pretrained(new_model)

In [None]:
import os
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import login
import numpy as np

new_model = "tensorgirl/finetuned-gemma"
model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

generator = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )

model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1

def translate(text, src_lang, tgt_lang):

    translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
    result = translation_pipeline(text)
    return result[0]['translation_text']

def English(audio):

    transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def Hindi(audio):

    transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "hin_Deva", "eng_Latn")


def Telegu(audio):

    transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tel_Telu", "eng_Latn")

def Tamil(audio):

    transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "tam_Taml", "eng_Latn")

def Kannada(audio):

    transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    text = transcriber({"sampling_rate":sr, "raw":y})["text"]

    return translate(text, "kan_Knda", "eng_Latn")

def predict(audio, language):

    if language == "English":
        message = English(audio)

    if language == "Hindi":
        message = Hindi(audio)

    if language == "Telegu":
        message = Telegu(audio)

    if language == "Tamil":
        message = Tamil(audio)

    if language == "Kannada":
        message = Kannada(audio)

    print(message)

    sequences = generator(
            message,
            max_length=200,
            do_sample=False,
            top_k=10,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,)

    answer = ""
    for seq in sequences:
            answer = answer + seq['generated_text'] + " "

    print(answer)
    if language == "English":
        return answer

    if language == "Hindi":
        return translate(answer,"eng_Latn", "hin_Deva")

    if language == "Telegu":
        return translate(answer,"eng_Latn", "tel_Telu")

    if language == "Tamil":
        return translate(answer, "eng_Latn", "tam_Taml")

    if language == "Kannada":
        return translate(answer, "eng_Latn", "kan_Knda")

    return answer

In [None]:
from transformers import AutoTokenizer, pipeline
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig

model_id = "tensorgirl/finetuned-gemma"

# Create the quantization configuration with desired quantization parameters
q_config = OVWeightQuantizationConfig(bits=4, group_size=128, ratio=0.8)

# Create OpenVINO configuration with optimal settings for this model
ov_config = {"PERFORMANCE_HINT": "LATENCY", "CACHE_DIR": "model_cache", "INFERENCE_PRECISION_HINT": "f32"}

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = OVModelForCausalLM.from_pretrained(
    model_id,
    export=True,
    quantization_config=q_config,
    device=device,
    ov_config=ov_config,
  )