In [None]:
!pip3 install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/

In [None]:
!pip install --upgrade setuptools wheel
!pip install pyTelegramBotAPI
!pip install openai langchain faiss-cpu tiktoken
!pip install schedule
!pip install config
!pip install python-dotenv
!pip install gTTs
!pip install celery
!pip -q install langchain openai tiktoken PyPDF2 faiss-cpu
!pip install llama-cpp-python

!pip install ipykernel
!python -m ipykernel install
!conda install notebook ipykernel
!ipython kernelspec install-self

In [None]:
!pip install unstructured
!pip uninstall pdfminer
!pip install opencv-python
!pip install pdfminer.six
!pip install langchain openai chromadb tiktoken pypdf unstructured pdf2image
!pip install unstructured.pytesseract
!pip install unstructured_inference
!pip install chromadb
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install torch==1.11.0+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
!pip install sentence-transformers
!pip install accelerate
!pip install optimum
!pip install auto-gptq
!pip install pikepdf
!pip install pillow-heif
!pip install -U langchain-community
!sudo apt-get install -y poppler-utils
!pip install --upgrade pdf2image

In [None]:
import json
import sys
import random
import logging
import argparse
import time
from pathlib import Path
import queue
import threading

from llama_cpp import Llama
import telebot
from telebot import types
import logging
import sqlite3
from huggingface_hub import hf_hub_download

from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.document_loaders.pdf import UnstructuredPDFLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import torch
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from pdf2image import convert_from_path
from ast import literal_eval

In [None]:
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [None]:
#preparing your documents

In [None]:
loader = UnstructuredPDFLoader("/content/sample_data/your_data_here_pdf")
docs = loader.load()

In [None]:
txt_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap = 64)
texts = txt_splitter.split_documents(docs)
len(texts)

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name = "thenlper/gte-large",
    model_kwargs = {"device": "cuda"},
    encode_kwargs = {"normalize_embeddings": True},
)

In [None]:
query_result = embeddings.embed_query(texts[0].page_content)
print(len(query_result))

In [None]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db")
results = db.similarity_search("an example of essay", k = 2)
len(results)

In [None]:
#end of the docs part

In [None]:
TOKEN = 'your_token'

In [None]:
MODEL_NAME = "TheBloke/Llama-2-13b-Chat-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

In [None]:
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

In [None]:
class ChatHistoryDB:
    def __init__(self, database_name):
        self.db_name = database_name
        self.logger = logging.getLogger(__name__)
        self.create_tables()

    def create_tables(self):
        conn = sqlite3.connect(self.db_name)
        self.logger.info(f"Database {self.db_name} opened")
        cursor = conn.cursor()

        # Create the message table
        cursor.execute('''CREATE TABLE IF NOT EXISTS messages (
            message_id INTEGER PRIMARY KEY AUTOINCREMENT,
            chat_id INTEGER NOT NULL,
            user_prompt TEXT NOT NULL,
            answer TEXT NOT NULL,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            deleted INTEGER DEFAULT 0
        );''')

        conn.commit()
        conn.close()

    def insert_message(self, chat_id, user_prompt, answer):
        conn = sqlite3.connect(self.db_name)
        cursor = conn.cursor()
        cursor.execute("INSERT INTO messages (chat_id, user_prompt, answer) VALUES (?, ?, ?)",
                       (chat_id, user_prompt, answer))
        conn.commit()
        conn.close()

    def get_chat_messages(self, chat_id, limit=5):
        conn = sqlite3.connect(self.db_name)
        cursor = conn.cursor()
        cursor.execute("SELECT user_prompt, answer FROM messages WHERE chat_id = ? AND deleted = 0 LIMIT ?",
                       (chat_id, limit))
        results = cursor.fetchall()
        conn.close()
        return results

    def delete_all_history(self, chat_id):
        conn = sqlite3.connect(self.db_name)
        cursor = conn.cursor()
        cursor.execute("UPDATE messages SET deleted = 1 WHERE chat_id = ?", (chat_id,))
        conn.commit()
        conn.close()

In [None]:
parser = argparse.ArgumentParser(description='LLaMa telegram bot')
parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")
parser.add_argument('-t', '--threads', default=4, type=int, help='Number of threads to use')
parser.add_argument('--max-tokens', default=128, type=int, help='The maximum number of tokens to generate')
parser.add_argument('--enable-history', action='store_true', help='Simulate memory in a chatbot')
parser.add_argument('--skip-init-prompt', action='store_true', help='Skip the initial prompt (faster startup)')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')

args = parser.parse_args()

In [None]:
model_name_or_path = "TheBloke/CodeLlama-13B-Python-GGUF"
model_basename = "codellama-13b-python.Q5_K_M.gguf"
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [None]:
log_level = logging.DEBUG if args.debug else logging.INFO
log_format = '%(asctime)s [%(levelname)s] %(message)s'
date_format = '%Y-%m-%d %H:%M:%S'

logging.basicConfig(level=log_level, format=log_format, datefmt=date_format)
logger = logging.getLogger(__name__)


bot = telebot.TeleBot(TOKEN)
seed = random.randint(1, sys.maxsize)

try:
    llama = Llama(model_path=model_path, n_ctx=512, n_gqa=8, seed=seed, n_threads=args.threads, verbose=True)
except ValueError as e:
    logger.error("Error while initializing LLaMa: "+str(e))
    sys.exit(1)

historyDb = ChatHistoryDB("chat.db")
job_queue = queue.Queue()

init_prompt = "Write a short response that appropriately completes" \
              "the request."
q_prompt = "### Instruction:"
a_prompt = "### Response:"


def process_job(job):
    def generate_text(user_prompt, max_tokens=args.max_tokens, stream=False, custom_prompt=False, chat_id=None,
                      history= args.enable_history):
        if args.skip_init_prompt:
            prompt = f"{q_prompt}{user_prompt}\n{a_prompt}"
        else:
            prompt = f"{init_prompt}\n{q_prompt} {user_prompt}\n{a_prompt}"
        if custom_prompt:
            prompt = user_prompt
        if history and chat_id:
            prompt = get_last_messages(chat_id) + prompt

        logger.debug(f"Generation for: {prompt}")

        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

        def create_chain(_retriever):
            qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever=_retriever, memory=memory, verbose=False)
            return qa_chain

        llm_chain = create_chain(db.as_retriever(search_kwargs={"k":2}))

        if stream and max_tokens > 2048:
            logger.warning("This is likely to exceed 4096 characters, which would not fit into one stream message")
        nswr = llm_chain.run(prompt)
        print(nswr)
        helpful_answer_index = nswr.find("Helpful Answer:")
        if helpful_answer_index != -1:
          start_index = helpful_answer_index + len("Helpful Answer:") + 1
          response_text = nswr[start_index:].strip()
        return response_text

    user_prompt = job[0]
    chat_id = job[1]
    msg = job[2]
    custom_prompt = job[3]

    try:
        bot.edit_message_text(chat_id=msg.chat.id, text="Started to generate text for you...",
                              message_id=msg.message_id)
        logger.info(f"Generating text for user {msg.chat.username}")

        if custom_prompt:
            json_obj = generate_text(user_prompt, chat_id=chat_id, stream=False,
                                     custom_prompt=True)
        else:
            json_obj = generate_text(user_prompt, chat_id=chat_id, stream=False)
        output = json_obj

        logger.debug(json.dumps(json_obj, indent=2))
        text_to_user = output

        send_by_chunks(msg, text_to_user)
        logger.info(f"Sent to user {msg.chat.username}: {text_to_user}")

        if args.enable_history:
            historyDb.insert_message(chat_id, user_prompt, output)
        bot.delete_message(msg.chat.id, msg.message_id)  # delete 'please wait a moment'
    except OSError as e:
        bot.reply_to(msg, f"OSError: {e}")
    except Exception as e:
        bot.reply_to(msg, f"Error: {e}")


def process_queue():
    while True:
        try:
            job = job_queue.get()
            start_time = time.time()
            process_job(job)
            end_time = time.time()
            elapsed_time = end_time - start_time
            logger.info(f"Job processed in {elapsed_time:02f} seconds")
            job_queue.task_done()
        except Exception as e:
            logger.error(f"Error: {e}")


def send_by_chunks(message, text, **kwargs):
    if len(text) < 5:
        logger.error("Message is empty or too short")
    if len(text) <= 4096:
        bot.send_message(message.chat.id, text, **kwargs)
    else:
        chunks = []
        while len(text) > 0:
            chunk = text[:4096]
            chunks.append(chunk)
            text = text[4096:]
        for chunk in chunks:
            bot.reply_to(message, chunk, **kwargs)


def get_last_messages(chat_id):
    messages = historyDb.get_chat_messages(chat_id)
    history = ""
    for user_prompt, answer in messages:
        history += f"{q_prompt} {user_prompt}\n {a_prompt} {answer}\n"
    return history


@bot.message_handler(commands=['history'])
def history_command(message):
    keyboard = types.InlineKeyboardMarkup()
    button = types.InlineKeyboardButton(text="Delete history", callback_data="remove_history")
    keyboard.add(button)
    history = get_last_messages(message.chat.id)
    if history == "":
        history = "You have no history with the bot"
    bot.send_message(message.chat.id, text=history, reply_markup=keyboard)


@bot.callback_query_handler(func=lambda call: call.data == 'remove_history')
def send_message_callback(call):
    historyDb.delete_all_history(call.message.chat.id)
    bot.send_message(call.message.chat.id, "Chat history successfully forgotten")


@bot.message_handler(commands=['start', 'help'])
def start_command(message):
    bot.reply_to(message, "Hello. This is chatGPT bot based on LLaMa.\nUsage:"
                          "\n\n<text> - Q&A mode.\n"
                          "/raw <prompt> - Use your own prompt.\n"
                          "/history - show history and delete it\n\n"
                          f"Current model: {model_basename}")


@bot.message_handler(commands=['raw'])
def raw_command(message):
    user_prompt = message.text.replace("/raw ", '', 1)

    msg = bot.reply_to(message, f"Please wait a moment. Current queue: {job_queue.qsize()}")
    bot.send_chat_action(chat_id=message.chat.id, action='typing')
    job_queue.put((user_prompt, message.chat.id, msg, True))


@bot.message_handler(func=lambda message: True)
def main(message):
    if message.text.startswith("/"):
        bot.reply_to(message, "Wrong command")
        return
    user_prompt = message.text
    msg = bot.reply_to(message, f"Please wait a moment. Current queue: {job_queue.qsize()}")
    bot.send_chat_action(chat_id=message.chat.id, action='typing')
    job_queue.put((user_prompt, message.chat.id, msg, False))
    logger.info("Added a new task from user: %s (%s), text: %s",  message.chat.username,  message.chat.id, user_prompt)


t = threading.Thread(target=process_queue)
t.daemon = True
t.start()

bot.infinity_polling()

In [None]:
!pip freeze