In [None]:

!pip install --upgrade urllib3
!pip install flask scikit-learn PyMuPDF pyngrok sentence-transformers

import os
import fitz  # PyMuPDF
from flask import Flask, request, jsonify, render_template_string
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import threading
from pyngrok import ngrok
import socket
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize


def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        pdf_document = fitz.open(pdf_path)
        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)
            text += page.get_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return None

pdf_path = '/content/PYTORCH DOCUMENTATION1.pdf'  # Ensure the PDF is in the correct path
pytorch_text = extract_text_from_pdf(pdf_path)

if pytorch_text is None:
    raise ValueError("Failed to extract text from the PDF document.")


sentences = sent_tokenize(pytorch_text)


model = SentenceTransformer('all-MiniLM-L6-v2')


sentence_embeddings = model.encode(sentences)

def get_response(query):
    try:
        query_embedding = model.encode([query])
        similarities = cosine_similarity(query_embedding, sentence_embeddings).flatten()
        response_index = similarities.argsort()[-10:][::-1]  # Get top 10 most similar sentences
        relevant_sentences = list(dict.fromkeys([sentences[i] for i in response_index]))  # Remove duplicates
        return " ".join(relevant_sentences[:5])  # Return top 5 unique sentences
    except Exception as e:
        print(f"Error in generating response: {e}")
        return "Sorry, I encountered an error while processing your request."

app = Flask(__name__)

@app.route('/')
def home():
    return render_template_string('''
        <!doctype html>
        <html lang="en">
          <head>
            <meta charset="utf-8">
            <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
            <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
            <title>PyTorch Documentation Chatbot</title>
            <style>
              body {
                background-color: #f8f9fa;
                padding-top: 50px;
              }
              .container {
                max-width: 600px;
              }
              .card {
                margin-top: 20px;
              }
              .card-header {
                background-color: #007bff;
                color: white;
              }
            </style>
          </head>
          <body>
            <div class="container">
              <div class="card">
                <div class="card-header">
                  <h3 class="card-title">PyTorch Documentation Chatbot</h3>
                </div>
                <div class="card-body">
                  <form action="/ask" method="post">
                    <div class="form-group">
                      <label for="message">Ask a Question:</label>
                      <input type="text" class="form-control" id="message" name="message" placeholder="Enter your question here" required>
                    </div>
                    <button type="submit" class="btn btn-primary btn-block">Ask</button>
                  </form>
                </div>
              </div>
            </div>
            <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js"></script>
            <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js"></script>
            <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"></script>
          </body>
        </html>
    ''')

@app.route('/ask', methods=['POST'])
def ask():
    try:
        user_input = request.form.get('message')
        if not user_input:
            return "Please provide a valid query."
        response = get_response(user_input)
        return render_template_string('''
            <!doctype html>
            <html lang="en">
              <head>
                <meta charset="utf-8">
                <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
                <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
                <title>PyTorch Documentation Chatbot</title>
                <style>
                  body {
                    background-color: #f8f9fa;
                    padding-top: 50px;
                  }
                  .container {
                    max-width: 600px;
                  }
                  .card {
                    margin-top: 20px;
                  }
                  .card-header {
                    background-color: #007bff;
                    color: white;
                  }
                </style>
              </head>
              <body>
                <div class="container">
                  <div class="card">
                    <div class="card-header">
                      <h3 class="card-title">PyTorch Documentation Chatbot</h3>
                    </div>
                    <div class="card-body">
                      <h4>Response:</h4>
                      <p>{{ response }}</p>
                      <a href="/" class="btn btn-primary btn-block">Ask another question</a>
                    </div>
                  </div>
                </div>
                <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js"></script>
                <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js"></script>
                <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"></script>
              </body>
            </html>
        ''', response=response)
    except Exception as e:
        print(f"Error in /ask route: {e}")
        return f"Internal Server Error: {e}"

@app.route('/chat', methods=['POST'])
def chat():
    try:
        user_input = request.json.get('message')
        if not user_input:
            return jsonify({"response": "Please provide a valid query."})
        response = get_response(user_input)
        return jsonify({"response": response})
    except Exception as e:
        print(f"Error in /chat route: {e}")
        return jsonify({"response": f"Internal Server Error: {e}"})

def find_free_port():
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('', 0))
    s.listen(1)
    port = s.getsockname()[1]
    s.close()
    return port

port = find_free_port()

def run():
    app.run(host='0.0.0.0', port=port)


thread = threading.Thread(target=run)
thread.start()


!ngrok authtoken 2jC1mWpGT7Q8KEUpqE6f099Hy1g_5yCJoAj1uPvfqNU89tow8


public_url = ngrok.connect(port)
print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}\"".format(public_url, port))


Collecting urllib3
  Downloading urllib3-2.2.2-py3-none-any.whl (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.4/121.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: urllib3
  Attempting uninstall: urllib3
    Found existing installation: urllib3 2.0.7
    Uninstalling urllib3-2.0.7:
      Successfully uninstalled urllib3-2.0.7
Successfully installed urllib3-2.2.2
Collecting PyMuPDF
  Downloading PyMuPDF-1.24.7-cp310-none-manylinux2014_x86_64.whl (3.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
Collecting PyMuPDFb==1.24.6 (from PyMuPDF)
  Download

  from tqdm.autonotebook import tqdm, trange
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:45301
 * Running on http://172.28.0.12:45301
INFO:werkzeug:[33mPress CTRL+C to quit[0m


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
 * ngrok tunnel "NgrokTunnel: "https://91b7-35-221-255-197.ngrok-free.app" -> "http://localhost:45301"" -> "http://127.0.0.1:45301"
