<a href="https://colab.research.google.com/github/xjdeng/mbtimodel/blob/main/reddit_gemini_query.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/xjdeng/mbtimodel
!pip install praw google-generativeai path.py==12.0.1
!pip install -r mbtimodel/requirements.txt

In [None]:
import joblib
import praw
from google.colab import userdata
import bs4
import markdown
import re
import pandas as pd
import praw
from prawcore.exceptions import NotFound
import string
import google.generativeai as genai
import enum
from typing_extensions import TypedDict
import pprint
from path import Path as path
from datetime import datetime

In [None]:

# IMPORTANT: Set up your Reddit AND your Gemini credentials and enter them into your Google Colab Secrets
# See Vid for instructions, for Reddit: https://www.youtube.com/watch?v=VAJFZEeKjSY
# For Gemini: https://www.youtube.com/watch?v=S1elvCs1gyI

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
assert GOOGLE_API_KEY is not None
genai.configure(api_key=GOOGLE_API_KEY)
client_id = userdata.get("reddit_client_id")
assert client_id is not None
client_secret = userdata.get("reddit_client_secret")
assert client_secret is not None
username = userdata.get("reddit_username")
assert username is not None
password = userdata.get("reddit_password")
assert password is not None
app_name = userdata.get("reddit_app")
reddit = praw.Reddit(client_id=client_id,
                     client_secret=client_secret,
                     user_agent=app_name,
                     username=username, \
                     password=password)

model = genai.GenerativeModel("gemini-1.5-flash-latest")

In [None]:
class Confidence(enum.Enum):
  low = "low"
  medium = "medium"
  high = "high"

class answer(TypedDict):
    answer: str
    confidence: Confidence
    explanation: str

def noquotes(text):
    """
This function first stated out as a way to remove markdown quotes from raw reddit markdown text but now it's more of a
general purpose text parser, but the name hasn't changed.
    """
    #https://stackoverflow.com/questions/761824/python-how-to-convert-markdown-formatted-text-to-text
    html = markdown.markdown(text)
    text = ''.join(bs4.BeautifulSoup(html, 'lxml').findAll(string=True))
    t1 = re.sub(">.+?(\n|$)","",text).replace("\\n","").replace("\\","")
    return t1

def query_history(query, history):
    prompt = f"""
    I want make the following query about a particular reddit user based on their comments and submissions in this Reddit history.
    Please pay close attention to the dates of the comments and submissions and pay more attention to the more recent dates.
    If something the user says conflicts with an earlier comment or submission they made, assume they've changed and the more recent one is more likely true now.

    Query:
    ---
    {query}
    ---

    Please answer to this query, state an explanation for why you chose your answer, and state your confidence level in your answer.

    Reddit History:
    ---
    {history}
    ---

    """
    result = model.generate_content(prompt,
                                    generation_config = genai.GenerationConfig(
                                        response_mime_type="application/json", response_schema=answer
                                    ))
    return result

path("tmp").mkdir_p()

def query_user(username, query):
    try:
        userfile = f"tmp/{username}.txt"
        if path(userfile).exists():
            with open(userfile) as f:
                fulltext = f.read()
        else:
            comms = list(reddit.redditor(username).comments.new(limit=None))
            subs = list(reddit.redditor(username).submissions.new(limit=None))
            text = []
            for comment in comms:
                comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S')
                text.append(f"[Comment Date: {comment_date}]\n{noquotes(comment.body)}")
            for sub in subs:
                sub_date = datetime.utcfromtimestamp(sub.created_utc).strftime('%Y-%m-%d %H:%M:%S')
                newsub = noquotes(sub.selftext)
                if len(newsub) > 0:
                    text.append(f"[Submission Date: {sub_date}]\n{newsub}")
            fulltext = "\n\n\n".join(text)
            with open(userfile, 'w') as f:
                f.write(fulltext)
        return query_history(query, fulltext).to_dict()['candidates'][0]['content']['parts'][0]['text']
    except NotFound:
        return None

In [None]:
result = query_user("thisisbillgates", "Does this user like bacon?")
pprint.pprint(result)