In [None]:
!pip install transformers
!pip install tensorboardX

#Creating the model

In [2]:
import torch

import transformers
from transformers import (WEIGHTS_NAME,BertConfig, BertForMaskedLM, BertTokenizer)

class EmbeddingGenerator:
    def __init__(self, config):
        # self.num_words = config.get("num_words", 20)
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"using device: {self.device}")

        model = BertForMaskedLM.from_pretrained("bert-base-uncased", output_hidden_states=True)
        #model.load_state_dict(torch.load("./model/pytorch_model.bin"))
        #model.eval()
        model.to(self.device)
        self.model = model

    def predict(self, text):
        input_ids = torch.tensor(self.tokenizer.encode(text, add_special_tokens=True)).unsqueeze(0)
        outputs = self.model(input_ids)

        embeddings = outputs[1][-1]
        embeddings = embeddings.detach().numpy()[0]

        size = embeddings.shape[0]
        sum_array = [sum(x) for x in zip(*embeddings)]
        avg_array = [sum_array[i]/size for i in range(len(sum_array))]

        return avg_array

#Creating the API

In [None]:
!pip install flask-ngrok

In [None]:
from flask_ngrok import run_with_ngrok
from flask import Flask, jsonify, request
import json

app = Flask(__name__)
run_with_ngrok(app)   #starts ngrok when the app is run

#Init the model
predictor = EmbeddingGenerator({})


@app.route("/embedding", methods=['POST'])
def predict():
    posted_data = request.get_json()
    context = posted_data['context']
    result = predictor.predict(context)

    return jsonify({
        "vector" : result,
    })

@app.route("/")
def home():
    return "<h1>Running Flask on Google Colab!</h1>"
  
app.run()