In [1]:
from dotenv import load_dotenv
load_dotenv()

from huggingface_hub import InferenceClient
import os
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, types, text, Engine
import time

from src import database_methods as dbm
from src import ai_agent_methods as aiam


In [2]:
client: InferenceClient = InferenceClient(
    api_key=os.environ["HF_TOKEN"],
)

try:
    
    completion = client.chat.completions.create(
        model="HuggingFaceH4/zephyr-7b-beta:featherless-ai",
        messages=[
            {
                "role": "user",
                "content": "What is the capital of France?"
            }
        ],
    )

    print(f"Model answer (What is the capital of France?):\n{completion.choices[0].message.content}")
except Exception as e:
    print("Model unsuccessfully loaded")

Model answer (What is the capital of France?):
The capital city of France is Paris (French: Paris). The country's other major cities include Marseille, Lyon, and Nice, but Paris is the most populous and serves as the political, economic, and cultural center of France.


## Due to the fact that the data did not contain customer e-mail data, e-mail addresses will be generated, especially for the project, from the prefix "client_", customer_id, and the suffix "@mail.com".

## This data will be generated in an SQL query along with other parameters that will be useful for the AI ​​Agent to send emails to customers who have been inactive for some time to active customers for whom a discount in the store will be provided, encouraging them to make further purchases.

In [None]:
try:
    engine: Engine = dbm.get_db_engine()
    print("DB Engine successfuly created")
except Exception as e:
    print(f"DB Engine creation error: {e}")

query: str = """
SELECT 
    customer_id, 
    SUM(quantity * price) AS monetary, 
    COUNT (DISTINCT order_id) AS frequency,
    ((SELECT MAX(date) FROM e_commerce_order_details) - MAX(date)) AS recency, 
    ROUND((SUM(quantity * price) / (COUNT (DISTINCT order_id))), 2) AS average_order_value, 
	CASE
		WHEN 
			((SELECT MAX(date) FROM e_commerce_order_details) - MAX(date)) >= 90
		THEN
			1
		ELSE
			0
	END AS churn, 
	CONCAT('customer_', customer_id,'@mail.com') AS email
FROM 
	e_commerce_order_details
GROUP BY 
    customer_id;
"""
df_rfm: pd.DataFrame = pd.read_sql(
    query, 
    engine
)

print(df_rfm.sample(5))
del query


DB Engine successfuly created
     customer_id  monetary  frequency  recency  average_order_value  churn  \
3337       16491   5648.95          1       46              5648.95      0   
4468       17948   1843.20          1      147              1843.20      1   
198        12504   1330.34          2       18               665.17      0   
2114       14954    858.31          1       12               858.31      0   
3921       17239   1929.26          1      312              1929.26      1   

                        email  
3337  customer_16491@mail.com  
4468  customer_17948@mail.com  
198   customer_12504@mail.com  
2114  customer_14954@mail.com  
3921  customer_17239@mail.com  


In [None]:
threshold_monetary: np.float64 = np.round(df_rfm["monetary"].quantile(0.80), 2)
print(f"Monetary threshold:\n{threshold_monetary}", "\n")
threshold_frequency: np.float64 = np.round(df_rfm["frequency"].quantile(0.80), 2)
print(f"Frequency threshold:\n{threshold_frequency}", "\n")
threshold_aov: np.float64 = np.round(df_rfm["average_order_value"].quantile(0.80), 2)
print(f"Average order value threshold:\n{threshold_aov}", "\n")


<class 'numpy.float64'>
Monetary threshold:
14660.86 

Frequency threshold:
5.0 

Average order value threshold:
3616.08 



In [None]:
conditions: list[pd.Series]= [
    (df_rfm["churn"] == 1),
    (
        (df_rfm["churn"] == 0) & (
            (df_rfm["monetary"] >= threshold_monetary)
            | 
            (df_rfm["frequency"] >= threshold_frequency)
            | 
            (df_rfm["average_order_value"] >= threshold_aov)
            )
    )
]
choice: list[str] = [
    "churn_recovery", 
    "vip_loyalty"
]

df_rfm["segmentation"] = np.select(conditions, choice, default="standard_promo")
print(df_rfm)
del threshold_monetary, threshold_frequency, threshold_aov, choice


<class 'list'>
     customer_id  monetary  frequency  recency  average_order_value  churn  \
0          12004   1509.60          1      227              1509.60      1   
1          12006     24.76          1      218                24.76      1   
2          12008   5689.57          1      276              5689.57      1   
3          12013     69.96          1      359                69.96      1   
4          12024    149.52          1      176               149.52      1   
...          ...       ...        ...      ...                  ...    ...   
4713       18280    623.26          1      277               623.26      1   
4714       18281    576.58          1      180               576.58      1   
4715       18282   1044.86          2        7               522.43      0   
4716       18283  12114.61         16        3               757.16      0   
4717       18287  18139.56          3       42              6046.52      0   

                        email    segmentation  


In [6]:
vip_loyalty_prompt: str = r"""
    You are a Customer Success Manager for a premium brand. 
    Write a short, exclusive appreciation email to our VIP customer. 
    Thank them for their loyalty. 
    Offer a special 20% discount on their next purchase with code: VIP20. 
    Tone: Professional, grateful, and exclusive. 
    Keep it under 100 words. Do not use hashtags.
"""
churn_recovery_prompt: str = r"""
    You are a warm and friendly Customer Support Specialist. 
    Write a 'We Miss You' email to a customer who hasn't purchased in a while. 
    Convince them to come back. 
    Offer a 15% welcome back discount with code: MISSYOU15. 
    Tone: Empathetic, warm, casual. 
    Keep it under 80 words."
"""

standard_promo_prompt: str = r"""
    You are an energetic Marketing Copywriter. 
    Write a catchy promotional email to an active customer. 
    Encourage them to check out our new arrivals. 
    Offer a 10% discount on the next order with code: HELLO10. 
    Tone: Exciting, direct, sales-oriented. 
    Keep it under 80 words.
"""

prompt: dict[str,str] = {
    "vip_loyalty": vip_loyalty_prompt, 
    "churn_recovery": churn_recovery_prompt, 
    "standard_promo": standard_promo_prompt
}



# Methodology: AI Content Generation & API Integration
This module implements an LLM-based (Large Language Model) agent to automate personalized marketing communication. Below are the key technical decisions and the rationale behind the chosen data flow architecture.

1. Simulation Mode ("Dry Run") & Data Safety
For safety reasons and development purposes, no actual emails are transmitted to customer addresses.

Process: Generated content is stored directly within the pandas.DataFrame as a new feature (e.g., email_draft).

Persistence: Upon completion, the enriched dataset is exported to a .csv file. This allows for a Human-in-the-Loop (HITL) approach, where drafts undergo Quality Assurance (QA) before any potential production deployment.

2. Sequential Execution Strategy (Single-Threaded)
The system utilizes the pandas .apply() method for row-by-row processing. While vectorization is typically preferred in Data Science for performance, sequential processing is the optimal architectural choice in this specific context.

We deliberately avoided Parallel Processing (e.g., joblib, multiprocessing) due to the constraints of the Hugging Face Serverless Inference API (Free Tier):

API Rate Limiting: The free tier imposes strict limits on the number of requests per second (QPS). Parallel execution would trigger simultaneous requests, resulting in immediate HTTP 429 (Too Many Requests) errors and token suspension.

I/O Bound Process: The bottleneck is not local CPU computation power, but Network Latency (waiting for the server to generate text).

Traffic Control: Using a sequential loop within .apply() allows for precise pacing via time.sleep(). This ensures the script respects the API's concurrency limits and maintains stability throughout the ETL process.

In [None]:
df_test: pd.DataFrame = df_rfm.sample(3)
try:
    df_test["email_draft"] = df_test.apply(aiam.generate_email, axis=1, args=(client, prompt))
    print("Email drafts successfully generated")
except Exception as e:
    print(f"Email drafts unsuccessfully generated. Error: {e}")

for i in range(3):
    print(df_test["email_draft"].iloc[i])
    print(50 * "-")
del df_test

Subject: We Miss You at Wojciech Kiełbowicz & Co

Dear Valued Customer,

We hope this email finds you well. It's been a while since you last shopped with us at Wojciech Kiełbowicz & Co, and we've been missing you! We understand that life gets busy, but we want you to know that we're here and ready to welcome you back with open arms. As a token of our appreciation, we're excited to offer you a special discount as a way to say thank you for choosing us in the past. Use the code MISSYOU15 at checkout to enjoy 15% off your next purchase.

We value your loyalty and would love to have you back in our store. Our team has been working hard to bring you more of the unique and high-quality items you know and love, and we'd be delighted to help you find your next favorite piece. Whether it's a cozy sweater for fall or a stylish accessory, we have you covered. Our collection features the latest trends and timeless classics that will elevate your wardrobe.

We're committed to providing exceptional 

In [9]:
try:
    print("Email drafts are being generated...")
    df_rfm["email_draft"] = df_rfm.apply(aiam.generate_email, axis=1, args=(client,prompt))
    print("Email drafts successfully generated")
except Exception as e:
    print(f"Email drafts unsuccessfully generated. Error: {e}")

cols: list[str] = [
    "customer_id", 
    "email_draft"
]

df_rfm[cols].to_csv("../data/processed/marketing_campaign_drafts.csv", index=False)
del cols

Email drafts are being generated...
Error:
Client error '402 Payment Required' for url 'https://router.huggingface.co/v1/chat/completions' (Request ID: Root=1-698c8d6f-5dd06b0c5da8d33d49fd61c1;2246bd95-d97a-4138-a87e-06073a7d0ad6)
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/402

Credit balance is depleted: purchase pre-paid credits to continue using Inference Providers with your account. Alternatively, subscribe to PRO to get 20x more included usage.
Error:
Client error '402 Payment Required' for url 'https://router.huggingface.co/v1/chat/completions' (Request ID: Root=1-698c8d6f-5a08713575115bf51166b17a;ce501ce9-f9aa-4eaa-8e39-73e783f7ae7f)
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/402

Credit balance is depleted: purchase pre-paid credits to continue using Inference Providers with your account. Alternatively, subscribe to PRO to get 20x more included usage.
Error:
Client error '402 Payment Required' f

KeyboardInterrupt: 