In [2]:
pip install detoxify

Collecting detoxify
  Downloading detoxify-0.5.2-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.7.0->detoxify)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.7.0->detoxify)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.7.0->detoxify)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.7.0->detoxify)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.7.0->detoxify)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.7.0->detoxify)
  

In [3]:
from detoxify import Detoxify

In [7]:
pip install scikit-learn transformers sentence-transformers



In [8]:
import pandas as pd

In [9]:
df = pd.read_csv('/content/slatemate_interest_feed_dataset.csv')

In [11]:
from sentence_transformers import SentenceTransformer

In [16]:
model = SentenceTransformer('all-MiniLM-L6-v2')
def get_embedding(text):
    return model.encode(text, normalize_embeddings=True)

In [17]:
def get_combined_text(row):
    return str(row['title']) + " " + str(row['text'])

In [31]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def compute_similarity(interest, embedding):
    interest_vec = get_embedding(interest)
    return float(cosine_similarity([interest_vec], [embedding])[0][0])

In [21]:
detox_model = Detoxify('original')

def compute_toxicity(text):
    result = detox_model.predict(text)
    return result['toxicity']

In [30]:
def compute_wellbeing(relevance, toxicity, alpha=0.7):
    # More relevant and less toxic = better
    return 100 * (alpha * relevance + (1 - alpha) * (1 - toxicity))

In [23]:
def generate_safe_feed(user_interest, content_df):
    recommended = []
    blocked = []

    for _, row in content_df.iterrows():
        if row['relevance_score'] < 0.3:
            blocked.append({
                "title": row['title'],
                "reason": "Low relevance to interest"
            })
        elif row['toxicity_score'] > 0.6:
            blocked.append({
                "title": row['title'],
                "reason": "Potentially unsafe content"
            })
        else:
            reason = "Highly relevant & safe" if row['wellbeing_score'] > 85 else "Relevant & safe"
            recommended.append({
                "title": row['title'],
                "source": row['source'],
                "wellbeing_score": round(row['wellbeing_score'], 2),
                "reason": reason
            })

    recommended = sorted(recommended, key=lambda x: x['wellbeing_score'], reverse=True)

    return {
        "detected_interest": user_interest,
        "top_recommendations": recommended[:10],
        "blocked_content": blocked
    }

In [32]:
user_interest = input("Enter your interest: ")

df = pd.read_csv("/content/slatemate_interest_feed_dataset.csv")
df['embedding'] = df.apply(lambda row: get_embedding(get_combined_text(row)), axis=1)
df['relevance_score'] = df['embedding'].apply(lambda x: compute_similarity(user_interest, x))
df['toxicity_score'] = df['text'].apply(compute_toxicity)
df['wellbeing_score'] = df.apply(
    lambda row: compute_wellbeing(row['relevance_score'], row['toxicity_score']), axis=1)

result = generate_safe_feed(user_interest, df)
print(result)


Enter your interest: chess
{'detected_interest': 'chess', 'top_recommendations': [{'title': 'Beginner Chess Tactics', 'source': 'NewsSite', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'Blog', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'Reddit', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'Wikipedia', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'YouTube', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'Reddit', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': 'Beginner Chess Tactics', 'source': 'YouTube', 'wellbeing_score': 68.32, 'reason': 'Relevant & safe'}, {'title': "Magnus Carlsen's Best Games", 'source': 'Twitter', 'wellbeing_score': 68.04, 'reason': 'Relevant & safe'}, {'title': "

Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInst