In [2]:
# filename: utils/db_connection.py

import sqlite3
import pandas as pd
from utils.new_paths import DB_PATH
from IPython.display import display, HTML


def get_db_connection(row_factory=None):
    conn = sqlite3.connect(DB_PATH)
    if row_factory:
        conn.row_factory = row_factory
    return conn

def get_posts_all():
    conn = get_db_connection()
    query = "SELECT * FROM posts"
    df = pd.read_sql_query(query, conn)
    conn.close()
    print(df.columns.tolist())
    return df

# Make permalink clickable and render table
def render_table(df, title):
    df = df.copy()
    df['selftext'] = df['selftext'].str.slice(0, 300) + '...'
    df['permalink'] = df['permalink'].apply(
        lambda x: f'<a href="https://www.reddit.com{x}" target="_blank">{x}</a>' if pd.notna(x) else ''
    )
    html = df[['post_id', 'title', 'selftext', 'post_type', 'post_length','permalink']].to_html(index=False, escape=False)
    display(HTML(f"""
        <h4>{title}</h4>
        <div style="max-height:500px; overflow:auto; border:1px solid #ccc; padding:10px; font-family:monospace; font-size:12px">
        {html}
        </div>
    """))

ModuleNotFoundError: No module named 'utils'

In [14]:
df = get_posts_all()

# Add derived columns
df['post_text'] = (df['title'].fillna('') + ' ' + df['selftext'].fillna('')).str.strip().str.lower()
df['post_length'] = df['post_text'].str.len()



['post_id', 'subreddit_id', 'username', 'title', 'selftext', 'created_utc', 'edited_utc', 'score', 'upvote_ratio', 'is_promotional', 'is_removed', 'is_deleted', 'flair', 'post_type', 'num_comments', 'url', 'permalink', 'extra_metadata', 'captured_at']


In [13]:

shortest_posts = df.nsmallest(50, 'post_length')
render_table(shortest_posts, "Shortest post_text entries")

post_id,title,selftext,post_type,post_length,permalink
1ktoot9,😲,...,link,1,/r/WGUCyberSecurity/comments/1ktoot9/_/
1kvvg7u,RIP,...,link,3,/r/WGU/comments/1kvvg7u/rip/
jrcbsb,ITIL,...,link,4,/r/WGU_CloudComputing/comments/jrcbsb/itil/
1j2yahs,D487,...,link,4,/r/WGUCyberSecurity/comments/1j2yahs/d487/
188t44r,VR&E,...,link,4,/r/WGU_Military/comments/188t44r/vre/
188t3a4,VR&E,...,link,4,/r/WGU_Military/comments/188t3a4/vre/
1lcuylx,D664,...,link,4,/r/WGU/comments/1lcuylx/d664/
1lwu1pi,D689,...,link,4,/r/WGUTeachersCollege/comments/1lwu1pi/d689/
uzfw41,edTPA,...,link,5,/r/WGUTeachersCollege/comments/uzfw41/edtpa/
1ihpzkz,DONE.,...,image,5,/r/WGU_MSDA/comments/1ihpzkz/done/


In [15]:
longest_posts = df.nlargest(50, 'post_length')
render_table(longest_posts, "Longest post_text entries")

post_id,title,selftext,post_type,post_length,permalink
1efzjo6,Tips for Accelerating Your MBA (or Not!) - Course Tips & Recommended Resources,"Let me start by saying this. I was what my mentor referred to as a ""super-accelerator."" I won't get into the exact timeline because I know that can rile some people up here and discourage others, and that's not my intention, but it was fast. I gave myself 4 months, and I didn't need most of that tim...",text,34102,/r/WGU_MBA/comments/1efzjo6/tips_for_accelerating_your_mba_or_not_course_tips/
1bfgy9b,Network+ Acronyms w/descriptions courtesy of ChatGPT,"1.\tAAAA, Authentication, Authorization, Accounting, Auditing, Framework providing a holistic approach to network resource access, tracking user activities, and ensuring security.\n\n2.\tACL, Access Control List, A set of rules used to control network traffic and reduce network attacks by allowing or de...",text,31596,/r/WGUIT/comments/1bfgy9b/network_acronyms_wdescriptions_courtesy_of_chatgpt/
193jc6p,CompTIA A+ 1101 Acronyms and descriptions,"1.\tAC: Alternating Current\n\n2.\tACL: Access Control List - Rules for packet filtering based on control access.\n\n3.\tADF: Automatic Document Feeder - A feature in printers, scanners, or fax machines to feed several pages at once.\n\n4.\tAES: Advanced Encryption Standard - A symmetric encryption algori...",text,30910,/r/WGUIT/comments/193jc6p/comptia_a_1101_acronyms_and_descriptions/
1eh7602,Finished as someone with NO prior experience. Review of all classes.,"There are others that have made this post, but I think it would be helpful if people gave context to who they are and their level of proficiencies so that others can more accurately predict how the experience will go for them.\n\n \n**Who I am:**\n\n* Early 30s male\n* Wife and kid (toddler)\n* Working f...",text,29836,/r/WGU_CompSci/comments/1eh7602/finished_as_someone_with_no_prior_experience/
1jfvmzu,Introduction to Computer Science - D684 | study materials/questions,"# OPERATING SYSTEMS (Q1–Q30)\n\n# Q1: What is an operating system (OS), and what are three major responsibilities it fulfills?\n\n**A1**:\n\n* An **OS** is the main software layer that manages computer hardware and provides services for applications.\n* It **allocates resources** (CPU, memory, I/O), **mana...",text,26791,/r/WGU_CompSci/comments/1jfvmzu/introduction_to_computer_science_d684_study/
1g2bi3w,D288 - Back-End Programming 2024 Guide!,"Hello fellow night owls!\n\nHope you're having a fantastic semester. I finished this course about two months ago, and since I have time, I want to write up an updated guide for D288 that consolidates all older resources, and includes new tips/explanations. This guide is mostly an extension of this gui...",text,24014,/r/WGU_CompSci/comments/1g2bi3w/d288_backend_programming_2024_guide/
1hwp9h9,How I Graduated WGU in Just Over 3 Months (few transfer credits),[My homescreen](https://preview.redd.it/e7uqngr0dsbe1.png?width=961&format=png&auto=webp&s=1497034d9ccacd9f0174482a8595447646266ffa)\n\n[Month 3 Results](https://preview.redd.it/agz2l2y3dsbe1.png?width=1340&format=png&auto=webp&s=6c3b48b7677db83bc1678064451de0e402f28825)\n\nI delayed this post to finish...,text,23670,/r/wguaccounting/comments/1hwp9h9/how_i_graduated_wgu_in_just_over_3_months_few/
gouuw0,MBA (Master of Business Administration) Course MEGA Thread (Also relevant to some MSML classes),"This is a guide to how I did my **MBA** at WGU (in **19 Days)**\n\n**I moved my post over from the other subreddit, as I wanted to specifically help people who are looking for information related to the Business degree, specifically, the MBA.**\n\nThis is a full, per course, write up of how I experi...",text,22360,/r/WGU_Business/comments/gouuw0/mba_master_of_business_administration_course_mega/
pgkti1,MSML (Master of Science in Management and Leadership) Course MEGA Thread,"​\n\n[MSML Degree, Done.](https://preview.redd.it/m31z51r554l71.png?width=1080&format=png&auto=webp&s=133438cb5f9f7e5ba0ec471a678818b0ded813d5)\n\n​\n\nMSML (Completed in 12 days)\n\nSo I just completed the MSML. Took me just 12 days. This writeup focuses on the 6 more classes needed for the M...",text,20208,/r/WGU_Business/comments/pgkti1/msml_master_of_science_in_management_and/
1f7cftv,Ultimate Guide to C971 - Mobile App Development Using C#,"Ultimate Guide to C971 - From Start to Finish\n\nHello everyone! I've benefitted greatly from the WGU reddit over the last 15 months. Quite frankly, it's the only reason I still use reddit.\n\nNow I'd like to give back.\n\nBelow are the exact steps I took from beginning to end in Mobile App Development us...",text,19282,/r/wgu_devs/comments/1f7cftv/ultimate_guide_to_c971_mobile_app_development/


In [1]:
# === Project Bootstrapping ===
from pathlib import Path
import sys

# Locate and add project root so 'utils' is importable
ROOT_DIR = Path().resolve()
while not (ROOT_DIR / "utils" / "db_connection_new.py").exists():
    ROOT_DIR = ROOT_DIR.parent
sys.path.append(str(ROOT_DIR))

# Now imports will work
from utils.db_connection_new import load_posts_dataframe
from utils.new_paths import DATA_DIR, OUTPUT_DIR

# Input / Output files
input_file = DATA_DIR / "courses_with_college_v10.csv"
output_file = OUTPUT_DIR / "manual_help_truth.csv"

# Confirm paths
print("✅ Input file exists:", input_file.exists())
print("📍 Output will save to:", output_file)

df = load_posts_dataframe()

print(f"Loaded {len(df)} rows")
print("Columns:", df.columns.tolist())
display(df.head(3))  # optional if in notebook

✅ Input file exists: True
📍 Output will save to: /Users/buddy/Desktop/WGU-Reddit/data/output/manual_help_truth.csv
Loaded 19001 rows
Columns: ['post_id', 'subreddit_id', 'title', 'selftext', 'created_utc', 'score', 'num_comments', 'permalink', 'subreddit_name']


Unnamed: 0,post_id,subreddit_id,title,selftext,created_utc,score,num_comments,permalink,subreddit_name
0,1k6jeqd,2se63,Examity,I’m curious as to how examity works. I read on...,1745468279,1,1.0,/r/WGU/comments/1k6jeqd/examity/,WGU
1,1k6j88n,2se63,Any Canadians here pursuing software developme...,I’m considering getting a software development...,1745467640,1,0.0,/r/WGU/comments/1k6j88n/any_canadians_here_pur...,WGU
2,1k6iufu,2se63,ANYONE IN D277,I’m half way through Front End Web Development...,1745466359,1,0.0,/r/WGU/comments/1k6iufu/anyone_in_d277/,WGU
