In [None]:
!pip install flask-ngrok
!pip install spacy
!pip install --user spacy-transformers
!pip install annoy
!python -m spacy download en_trf_distilbertbaseuncased_lg
!unzip JEOPARDY_CSV.zip

In [None]:
import spacy, csv, re, tqdm, time
from annoy import AnnoyIndex


def get_nearest_texts(query, index, dataset, n_neighbours=10):
    result = set(index.get_nns_by_vector(nlp(q).vector, n_neighbours))
    return [dataset[r] for r in result]


def build_index():
  nlp = spacy.load("en_trf_distilbertbaseuncased_lg")
  tag_cleaner = re.compile('<.*?>')
  dataset, vectors = [], []
  SHARD_SIZE = 1000

  with open('/content/JEOPARDY_CSV.csv', encoding='utf8') as csvfile:
      reader = csv.DictReader(csvfile)
      for row in reader:
          content = (re.sub(tag_cleaner, '',  row['Question']),  re.sub(tag_cleaner, '',  row['Answer']))
          dataset.append(content) 

  for i, content in enumerate(tqdm.tqdm(dataset[:SHARD_SIZE])):
      keys = nlp(content[0]).vector, nlp(content[1]).vector
      vectors.append((keys[0], i))
      vectors.append((keys[1], i))

  return vectors, dataset


def get_answers(q, n, vectors, dataset):
  n_dimensions = len(vectors[0][0])
  n_trees = 50
  n_neighbours = 10

  index = AnnoyIndex(n_dimensions, 'angular')
  for vec, val in vectors:
      index.add_item(val, vec)
      
  index.build(n_trees)
  result = get_nearest_texts(q, index, dataset, n_neighbours=n)
  return result

vectors, dataset = build_index()

100%|██████████| 1000/1000 [01:46<00:00,  9.43it/s]


In [None]:
from flask_ngrok import run_with_ngrok
from flask import Flask, render_template, request
from flask import Flask
import requests


app = Flask(__name__, template_folder='/content/templates')
run_with_ngrok(app)   #starts ngrok when the app is run


@app.route('/', methods=['GET', 'POST'])
def index():
  errors = []
  results = {}
  if request.method == "POST":
    # get url that the user has entered
    try:
      query = request.form['url']
      q, n = query.split(',')
      results = get_answers(q, int(n), vectors, dataset)
      # r = requests.get(url)
    except:
      errors.append("Please enter a valid query in format: " + 'Query, n_qs')
  return render_template('index.html', errors=errors, results=results)
  
app.run()