<a href="https://colab.research.google.com/github/worldwidekatie/BW_4/blob/master/Subreddit_MVP_Instructions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Post Here MVP Algorithm Instructions

*Ignore these first two cells because you will already have the nearest neighbors and tfid in your repo. If you need the nearest neighbors, download it from* [here](https://github.com/worldwidekatie/BW_4/blob/master/nn_cleaned.joblib). If you need the tfidvectorizer, download it from [here](https://github.com/worldwidekatie/BW_4/blob/master/tfidf_cleaned.joblib)

In [1]:
from google.colab import files
files.upload()

Saving nn_cleaned.joblib to nn_cleaned.joblib


In [None]:
from google.colab import files
files.upload()

## Dependencies
Make sure you have the following in your pipfile:

* joblib==0.16.0
* scikit-learn==0.22.2.post1
* plotly


In [152]:
# Make sure you have all of these in your .py file
import pandas as pd
from joblib import load
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import plotly.express as px
import plotly.graph_objects as go

In [6]:
# Load your assets
model = load('nn_cleaned.joblib')
tfidf = load('tfidf_cleaned.joblib')

In [179]:
# Bring in the relevant dataframe
df = pd.read_csv("https://raw.githubusercontent.com/worldwidekatie/BW_4/master/cleaned_subs.csv")
subreddits = df['subreddit']

# Predict Route Function

In [180]:
def predict(title, body):
  predictions = []
  query = tfidf.transform([title+body])
  pred = model.kneighbors(query.todense())

  for i in pred[1][0]:
    if subreddits[i] not in predictions:
      predictions.append(subreddits[i])
  
  return predictions[:5]

In [187]:
# Predict Example
title = "New puppy likes to chew"
body = "I got this new puppy and I can't get it to stop chewing my shoes. What do I do?"

predict(title, body)

['puppy101', 'whatsthatbook', 'AskVet', 'ferrets', 'LDESurvival']

# Links Route Function

In [185]:
def links(title, body):
  predictions = []
  links = []
  query = tfidf.transform([title+body])
  pred = model.kneighbors(query.todense())
  preds = list(zip(pred[1][0], pred[0][0]))

  for i in pred[1][0]:
    if subreddits[i] not in predictions:
      predictions.append(subreddits[i])
      links.append(f'https://www.reddit.com/r/{subreddits[i]}/')
  
  return links[:5]

In [188]:
# Links Example
title = "New puppy likes to chew"
body = "I got this new puppy and I can't get it to stop chewing my shoes. What do I do?"

links(title, body)

['https://www.reddit.com/r/puppy101/',
 'https://www.reddit.com/r/whatsthatbook/',
 'https://www.reddit.com/r/AskVet/',
 'https://www.reddit.com/r/ferrets/',
 'https://www.reddit.com/r/LDESurvival/']

## Visualization Route

In [195]:
def viz(title, body):
  predictions = []
  values = []
  size = []

  query = tfidf.transform([title+body])
  pred = model.kneighbors(query.todense())
  preds = list(zip(pred[1][0], pred[0][0]))

  for i in preds:
    if subreddits[i[0]] not in predictions:
      predictions.append(subreddits[i[0]])
      values.append(i[1])
      size.append((i[1]+1)*10)
  
  predictions = predictions[:6]
  values = values[:6]
  predictions.reverse()
  values.reverse()

  fig = go.Figure(data=[go.Scatter(
    x=values, y=predictions,
    mode='markers',
    marker=dict(
        color=values,
        size=size
    )
    )])


  fig.update_layout(
      title="Subreddit Relevance",
      xaxis_title="<==More Relevant | Less Relevant==>",
      font=dict(
          family="Courier New, monospace",
          size=18,
          color="RebeccaPurple"
      )
  )
  
  # Using to_json so it's ready to be sent as a JSON string
  return fig.to_json()

In [197]:
# To run this example, remove the .to_json() first
title = "New puppy likes to chew"
body = "I got this new puppy and I can't get it to stop chewing my shoes. What do I do?"

figure = viz(title, body)

figure.show()