**Set up HuggingFace access token as an EV**

In [None]:
#Get Hugging Face token from user data
from google.colab import userdata
import os
# In the left side bar, you can find a key logo, click on it and create your Hugging Face access token key variable
HF_TOKEN = userdata.get('HF_TOKEN')
os.environ['HUGGINGFACE_TOKEN'] = HF_TOKEN

**Importing Required Libraries**

In [None]:
%%writefile requirements.txt
transformers
flask
pyngrok
accelerate
torch

Overwriting requirements.txt


**Model Loader**

In [None]:
%%writefile models.py
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

class ModelLoader:
    def __init__(self):
        # Load models and tokenizers
        self.models = {
            "BSJCode-1-Stable": self.load_model("BSAtlas/BSJCode-1-Stable"),
            "CodeLlama": self.load_model("codellama/CodeLlama-7b-Instruct-hf"),
            "Terjman": self.load_model("atlasia/Terjman-Ultra")

        }
        self.tokenizers = {
            "BSJCode-1-Stable": AutoTokenizer.from_pretrained("BSAtlas/BSJCode-1-Stable"),
            "CodeLlama": AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
            "Terjman": AutoTokenizer.from_pretrained("atlasia/Terjman-Ultra")
        }

    def load_model(self, model_name):
        huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
        if not huggingface_token:
            raise ValueError("Hugging Face token is not set. Please configure it.")
        print(f"Loading model: {model_name}")
        return AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=huggingface_token)

    def generate_response(self, model_name, input_text):
        if model_name not in self.models:
            raise ValueError(f"Model {model_name} not loaded")

        tokenizer = self.tokenizers[model_name]
        model = self.models[model_name]

        inputs = tokenizer(input_text, return_tensors="pt")
        outputs = model.generate(inputs["input_ids"], max_length=100)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response


Overwriting models.py


**Flask API**

In [None]:
%%writefile app.py
from flask import Flask, request, jsonify
from models import ModelLoader

app = Flask(__name__)
model_loader = ModelLoader()

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.json
        user_input = data.get("input", "")
        service_type = data.get("service", "BS-friendly")

        if not user_input:
            return jsonify({"error": "Input text is required"}), 400

        if service_type == "BS-friendly":
            response = model_loader.generate_response("BSJCode-1-Stable", user_input)
        elif service_type == "Pro":
            response = model_loader.generate_response("CodeLlama", user_input)
        elif service_type == "Premium":
            # Process input through BS model
            intermediate_bs = model_loader.generate_response("BSJCode-1-Stable", user_input)
            # Refine BS output using CodeLlama
            intermediate_cl = model_loader.generate_response("CodeLlama", intermediate_bs)
            # Clarify output in Darija using Terjman
            response = model_loader.generate_response("Terjman", intermediate_cl)
        else:
            return jsonify({"error": "Invalid service type"}), 400

        return jsonify({"output": response}), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)

Overwriting app.py


**Run the app**

In [None]:
!pip install -r requirements.txt




**Exposing the end-point**

In [None]:
from google.colab import userdata
import os
# In the left side bar, you can find a key logo, click on it and create your Hugging Face access token key variable
NG_TOKEN = userdata.get('NG_TOKEN')
os.environ['NG_TOKEN'] = NG_TOKEN
print(NG_TOKEN)

In [None]:
from pyngrok import ngrok
import subprocess
import time

# Start the Flask app in the background using subprocess
process = subprocess.Popen(['python', 'app.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ngrok.set_auth_token(NG_TOKEN)

# Wait for ngrok to establish a tunnel
public_url = ngrok.connect(5000)
print(f"Flask app is running at: {public_url}")