# Install required packages


In [None]:
import os
import mysql.connector
import openai
from bs4 import BeautifulSoup
from datetime import datetime
from dotenv import load_dotenv
from transformers import GPT2Tokenizer

# Load OpenAI API key from .env file
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")


def remove_html_markup(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

def truncate_text(text, max_tokens=3750):
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    tokens = tokenizer.encode(text, return_tensors="pt", max_length=max_tokens, truncation=True)
    truncated_text = tokenizer.decode(tokens[0], skip_special_tokens=True)
    return truncated_text

def enrich_post(post_title, post_content):
    articleSection = "Apex, AppExchange, Approval Processes, Automated Testing, Best Practices, Case Studies, Code Reviews, Coding Standards, Continuous Deployment, Continuous Integration, Data Management, Design Patterns, Developer Community, Salesforce DevOps, Events & Webinars, Generative AI, Industry News & Updates, Industry-Specific Solutions, Infrastructure as Code, Integration, Lessons Learned, Lightning Web Components, Monitoring & Logging, New Features, Performance Optimization, Process Automation, Reports & Dashboards, Salesforce Administration, Salesforce Development, Salesforce Releases, Cybersecurity, SOQL & SOSL, Step-by-Step Guides, Success Stories, Tips & Tricks, Troubleshooting, Tutorials & How-Tos, Version Control, Visualforce, Workflow Rules"
    print (f"Enriching post {post_title}")
    clean_content = remove_html_markup(post_content)
    truncated_content = truncate_text(post_title+' '+clean_content)
    
    # Prepare the chat messages for GPT-4
    messages = [
        {"role": "system", "content": "You are a helpful assistant that can extract keywords, summarize text, and determine sentiment."},
        {"role": "user", "content": f"Extract 5 keywords in a comma-separated list without numbers from the following text: {truncated_content}"},
    ]

    # Extract keywords
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        
    )
    keywords = response.choices[0].message['content'].strip()
    print (f"Keywords: {keywords}")

    # Create articleSection
    messages.append({"role": "user", "content": f"Pick only one 'articleSection' phrase that best describes the previous text from this comma separated list. Only repeat back the phrase exactly as listed: {articleSection}"})
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
    )
    articlesection = response.choices[0].message['content'].strip()
    print (f"articleSection: {articlesection}")

    # Create summary
    messages.append({"role": "user", "content": f"Write a summary of the previous text in less than 25 words or 280 characters."})
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
    )
    summary = response.choices[0].message['content'].strip()
    print (f"Summary: {summary}")

    # Create SEO description
    messages.append({"role": "user", "content": f"Write an SEO-friendly summary of the previous text in less than 15 words or 160 characters"})
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
    )
    description = response.choices[0].message['content'].strip()
    print (f"SEO Description: {description}")

    # Determine sentiment
    messages.append({"role": "user", "content": f"Determine the sentiment of the previous text in one or two words"})
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
    )
    sentiment = response.choices[0].message['content'].strip()
    print (f"Sentiment: {sentiment}")
    result = {
        "articlesection": articlesection,
        "description": description,
        "keywords": keywords,
        "extracted_text": truncated_content,
        "summary": summary,
        "sentiment": sentiment,
    }
    return result

## Get Posts and see if they are enriched

In [None]:
# Set up database connection
db = mysql.connector.connect(
    host=os.getenv("WORK_WP_DBMS_HOST"),
    user=os.getenv("WORK_WP_DBMS_USER"),
    password=os.getenv("WORK_WP_DBMS_PASSWORD"),
    database=os.getenv("WORK_WP_DBMS_DATABASE")
)
cursor = db.cursor()
cursor.execute("SELECT ID, post_title, post_content FROM wp_posts WHERE post_type = 'post' AND post_status = 'publish' ORDER BY post_date desc")
posts = cursor.fetchall()
for post_id, post_title, post_content in posts:
    cursor.execute("SELECT * FROM post_enrichments WHERE post_id = %s;", (post_id,))
    existing_summary = cursor.fetchone()
    
    if not existing_summary:
        enriched_data = enrich_post(post_title, post_content)
        cursor.execute("""
        INSERT INTO post_enrichments (post_id, enrichment_datetime, articlesection, description, keywords, extracted_text, summary, sentiment)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s);
        """, (post_id, datetime.now(), enriched_data['articlesection'], enriched_data['description'], enriched_data['keywords'], enriched_data['extracted_text'],
              enriched_data['summary'], enriched_data['sentiment']))
        db.commit()
        print(f"Post {post_id} enriched and saved.")
    else:
        print(f"Post {post_id} already enriched, skipping.")
cursor.close()
db.close()

