# Setup

In [8]:
import pandas as pd
from google.colab import drive
import joblib
import re
from tensorflow import keras
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

# Content located at https://drive.google.com/drive/folders/1JwGMvRrlptyIfmMSezMPYXGPubGuwG-4?usp=sharing
drive.mount("/content/drive")
filepath = "/content/drive/MyDrive/data/final_project/"


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Functions

In [48]:
def load_objects():
  model = keras.models.load_model(f"{filepath}model")
  matrix_title = joblib.load(f"{filepath}matrix_title.pkl")
  matrix_body = joblib.load(f"{filepath}matrix_body.pkl")
  tfidf = joblib.load(f"{filepath}tfidf.pkl")

  return model, matrix_title, matrix_body, tfidf


def process_text(text):
    text = re.sub("[!\"#$%&'()*+,-.\/:;<=>?@\[\]^_`{|}~]", " ", text) # Punctuation
    text = text.lower()

    stop = stopwords.words('english')
    text = ' '.join([word for word in text.split() if word not in stop]) # Stopwords

    return text


def predict_tag(title, body):
  X_pred = pd.DataFrame({"title": title, "body": body}, index=[0])
  X_pred['body_proc'] = X_pred['body'].apply(process_text)
  X_pred['title_proc'] = X_pred['title'].apply(process_text)

  X_body = pd.DataFrame(matrix_body.transform(X_pred["body_proc"]).toarray())
  X_body.columns = [f"X_body_{i}" for i in X_body.columns]

  X_title = pd.DataFrame(matrix_title.transform(X_pred["title_proc"]).toarray())
  X_title.columns = [f"X_title_{i}" for i in X_title.columns]

  X_pred = pd.concat([X_pred, X_body, X_title], axis=1)

  x_cols = [col for col in X_pred.columns if "X_" in col]
  X_tfidf = tfidf.transform(X_pred[x_cols]).toarray()

  y_cols = ["c", "c#", "c++", "html", "java", "javascript", "php", "python", "r", "sql"]
  y_pred = pd.DataFrame(model.predict(X_tfidf), columns=y_cols)
  pred_tag = y_pred.apply(lambda x: y_pred.columns[x.argmax()], axis=1).iloc[0]

  return pred_tag


def assign_expert(tag):
  global experts

  experts = experts.sort_values([tag, "questions", "total"], ascending=[False, True, True])
  experts.iloc[0, experts.columns.get_loc('questions')] += 1
  assigned_expert = experts.iloc[0, experts.columns.get_loc('name')]
  experts = experts.sort_values("id")

  return assigned_expert


def model_demo():
  title = input("Title: ")
  body = input("Body: ")
  print("---")
  print("---")
  print("---")
  pred_tag = predict_tag(title, body)
  print(f"Predicted Tag: {pred_tag}")


def assignment_demo(tag):
  assigned_expert = assign_expert(tag)
  print(f"Question Tag: {tag}")
  print("---")
  print(f"Question Routed to {assigned_expert}")


def demo():
  title = input("Title: ")
  body = input("Body: ")
  print("---")
  pred_tag = predict_tag(title, body)
  print(f"Predicted Tag: {pred_tag}")
  assigned_expert = assign_expert(pred_tag)
  print("---")
  print(f"Question Routed to {assigned_expert}")

# Prepare Environment

In [49]:
model, matrix_title, matrix_body, tfidf = load_objects()
experts = pd.read_csv(f"{filepath}experts.csv", header=0)

In [50]:
experts

Unnamed: 0,id,name,java,c++,c,c#,r,html,javascript,php,python,sql,total,questions
0,0,Bob,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,1.5,0
1,1,Simone,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.5,1.75,0
2,2,Boris,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0
3,3,Devin,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,4,Sanjay,0.166667,1.0,0.5,0.0,0.0,0.0,0.0,0.0,0.25,0.0,1.916667,0
5,5,Casey,0.0,0.5,0.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.75,0
6,6,Jeff,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0
7,7,Erica,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0
8,8,Dmitri,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.25,1.0,1.75,0
9,9,Rose,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.5,0


# Model Demo

In [37]:
model_demo()

Title: cast Timespan.Ticks to varchar
Body: <p>I want to cast Timespan.Tick stored as Bigint to format '00 d, 00 h, 00 m' as varchar.</p>
---
---
---
Predicted Tag: sql


# Question Assignment Demo

In [46]:
assignment_demo("c++")

Question Tag: c++
---
Question Routed to Sanjay


In [47]:
experts

Unnamed: 0,id,name,java,c++,c,c#,r,html,javascript,php,python,sql,total,questions
0,0,Bob,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,1.5,0
1,1,Simone,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.5,1.75,0
2,2,Boris,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0
3,3,Devin,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,4,Sanjay,0.166667,1.0,0.5,0.0,0.0,0.0,0.0,0.0,0.25,0.0,1.916667,1
5,5,Casey,0.0,0.5,0.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.75,0
6,6,Jeff,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0
7,7,Erica,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0
8,8,Dmitri,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.25,1.0,1.75,0
9,9,Rose,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.5,1


# Full Demo

In [55]:
demo()

Title: If I open this URL in one of the popular browsers, will I probably get a HTTPS error?
Body: <p>I need to write a Python 3 script that answers the question in the title.</p>
---
Predicted Tag: python
---
Question Routed to Rose


In [54]:
experts

Unnamed: 0,id,name,java,c++,c,c#,r,html,javascript,php,python,sql,total,questions
0,0,Bob,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,1.5,0
1,1,Simone,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.5,1.75,0
2,2,Boris,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0
3,3,Devin,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,4,Sanjay,0.166667,1.0,0.5,0.0,0.0,0.0,0.0,0.0,0.25,0.0,1.916667,0
5,5,Casey,0.0,0.5,0.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.75,0
6,6,Jeff,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0
7,7,Erica,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0
8,8,Dmitri,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.25,1.0,1.75,0
9,9,Rose,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.5,0
