In [None]:
from dotenv import load_dotenv
load_dotenv()

from huggingface_hub import InferenceClient
import os
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, types, text, Engine
import time

from src import database_methods as dbm
from src import ai_agent_methods as aiam


In [None]:
client: InferenceClient = InferenceClient(
    api_key=os.environ["HF_TOKEN"],
)

try:
    
    completion = client.chat.completions.create(
        model="HuggingFaceH4/zephyr-7b-beta:featherless-ai",
        messages=[
            {
                "role": "user",
                "content": "What is the capital of France?"
            }
        ],
    )

    print(f"Model answer (What is the capital of France?):\n{completion.choices[0].message.content}")
except Exception as e:
    print("Model unsuccessfully loaded")

## Due to the fact that the data did not contain customer e-mail data, e-mail addresses will be generated, especially for the project, from the prefix "client_", customer_id, and the suffix "@mail.com".

## This data will be generated in an SQL query along with other parameters that will be useful for the AI ​​Agent to send emails to customers who have been inactive for some time to active customers for whom a discount in the store will be provided, encouraging them to make further purchases.

In [None]:
try:
    engine: Engine = dbm.get_db_engine()
    print("DB Engine successfuly created")
except Exception as e:
    print(f"DB Engine creation error: {e}")

query: str = """
SELECT 
    customer_id, 
    SUM(quantity * price) AS monetary, 
    COUNT (DISTINCT order_id) AS frequency,
    ((SELECT MAX(date) FROM e_commerce_order_details) - MAX(date)) AS recency, 
    ROUND((SUM(quantity * price) / (COUNT (DISTINCT order_id))), 2) AS average_order_value, 
	CASE
		WHEN 
			((SELECT MAX(date) FROM e_commerce_order_details) - MAX(date)) >= 90
		THEN
			1
		ELSE
			0
	END AS churn, 
	CONCAT('customer_', customer_id,'@mail.com') AS email
FROM 
	e_commerce_order_details
GROUP BY 
    customer_id;
"""
df_rfm: pd.DataFrame = pd.read_sql(
    query, 
    engine
)

print(df_rfm.sample(5))


In [None]:
threshold_monetary = np.round(df_rfm["monetary"].quantile(0.80), 2)
print(type(threshold_monetary))
print(f"Monetary threshold:\n{threshold_monetary}", "\n")
threshold_frequency = np.round(df_rfm["frequency"].quantile(0.80), 2)
print(f"Frequency threshold:\n{threshold_frequency}", "\n")
threshold_aov = np.round(df_rfm["average_order_value"].quantile(0.80), 2)
print(f"Average order value threshold:\n{threshold_aov}", "\n")


In [None]:
conditions = [
    (df_rfm["churn"] == 1),
    (
        (df_rfm["churn"] == 0) & (
            (df_rfm["monetary"] >= threshold_monetary)
            | 
            (df_rfm["frequency"] >= threshold_frequency)
            | 
            (df_rfm["average_order_value"] >= threshold_aov)
            )
    )
]
print(type(conditions))
choice: list[str, str] = [
    "churn_recovery", 
    "vip_loyalty"
]

df_rfm["segmentation"] = np.select(conditions, choice, default="standard_promo")
print(df_rfm)


In [None]:
vip_loyalty_prompt: str = r"""
    You are a Customer Success Manager for a premium brand. 
    Write a short, exclusive appreciation email to our VIP customer. 
    Thank them for their loyalty. 
    Offer a special 20% discount on their next purchase with code: VIP20. 
    Tone: Professional, grateful, and exclusive. 
    Keep it under 100 words. Do not use hashtags.
"""
churn_recovery_prompt: str = r"""
    You are a warm and friendly Customer Support Specialist. 
    Write a 'We Miss You' email to a customer who hasn't purchased in a while. 
    Convince them to come back. 
    Offer a 15% welcome back discount with code: MISSYOU15. 
    Tone: Empathetic, warm, casual. 
    Keep it under 80 words."
"""

standard_promo_prompt: str = r"""
    You are an energetic Marketing Copywriter. 
    Write a catchy promotional email to an active customer. 
    Encourage them to check out our new arrivals. 
    Offer a 10% discount on the next order with code: HELLO10. 
    Tone: Exciting, direct, sales-oriented. 
    Keep it under 80 words.
"""

prompt: dict[str,str] = {
    "vip_loyalty": vip_loyalty_prompt, 
    "churn_recovery": churn_recovery_prompt, 
    "standard_promo": standard_promo_prompt
}



# Methodology: AI Content Generation & API Integration
This module implements an LLM-based (Large Language Model) agent to automate personalized marketing communication. Below are the key technical decisions and the rationale behind the chosen data flow architecture.

1. Simulation Mode ("Dry Run") & Data Safety
For safety reasons and development purposes, no actual emails are transmitted to customer addresses.

Process: Generated content is stored directly within the pandas.DataFrame as a new feature (e.g., email_draft).

Persistence: Upon completion, the enriched dataset is exported to a .csv / .parquet file. This allows for a Human-in-the-Loop (HITL) approach, where drafts undergo Quality Assurance (QA) before any potential production deployment.

2. Sequential Execution Strategy (Single-Threaded)
The system utilizes the pandas .apply() method for row-by-row processing. While vectorization is typically preferred in Data Science for performance, sequential processing is the optimal architectural choice in this specific context.

We deliberately avoided Parallel Processing (e.g., joblib, multiprocessing) due to the constraints of the Hugging Face Serverless Inference API (Free Tier):

API Rate Limiting: The free tier imposes strict limits on the number of requests per second (QPS). Parallel execution would trigger simultaneous requests, resulting in immediate HTTP 429 (Too Many Requests) errors and token suspension.

I/O Bound Process: The bottleneck is not local CPU computation power, but Network Latency (waiting for the server to generate text).

Traffic Control: Using a sequential loop within .apply() allows for precise pacing via time.sleep(). This ensures the script respects the API's concurrency limits and maintains stability throughout the ETL process.

In [None]:

df_test: pd.DataFrame = df_rfm.sample(3)
df_test["email_draft"] = df_test.apply(aiam.generate_email, axis=1, args=(client, prompt))
for i in range(3):
    print(df_test["email_draft"].iloc[i])
    print(50 * "-")
del df_test