# Datatune with Azure OpenAI

## Setup and Installation   
First, let's install the necessary packages:

In [None]:
!pip install datatune

## Import Required Libraries


In [None]:
import pandas as pd
import dask.dataframe as dd
from datatune.llm.llm import Azure
from datatune.core.map import Map
from datatune.core.filter import Filter
from datatune.core.op import finalize
import os

## Configure Azure OpenAI

Set up your Azure OpenAI credentials. Make sure to set these environment variables in your command line:


In [None]:
export OPENAI_API_KEY="your-azure-openai-key"
export AZURE_API_BASE="https://your-resource.openai.azure.com/"
export AZURE_API_VERSION="your api version"

In [None]:
# Set your Azure OpenAI credentials
api_key = os.getenv("OPENAI_API_KEY") 
api_base = os.getenv("AZURE_API_BASE") 
api_version = os.getenv("AZURE_API_VERSION")

# Initialize Azure OpenAI LLM
llm = Azure(
    model_name="gpt-35-turbo",  # or "gpt-4" if available
    api_key=api_key,
    api_base=api_base,
    api_version=api_version,
)

### Step 1: Create E-commerce Product Dataset
Let's create a realistic e-commerce product dataset with various product types and customer reviews:

In [None]:
# Create e-commerce product dataset
data = {
    'product_id': ['P001', 'P002', 'P003', 'P004', 'P005', 'P006', 'P007', 'P008', 'P009', 'P010'],
    'product_name': [
        'Wireless Bluetooth Headphones with Noise Cancelling',
        'Organic Cotton Baby Onesie Set - 3 Pack',
        'Professional Chef Knife Set with Wooden Block',
        'Smart Fitness Tracker with Heart Rate Monitor',
        'Luxury Silk Pillowcase - Queen Size',
        'Portable Solar Phone Charger for Camping',
        'Artisan Coffee Beans - Ethiopian Single Origin',
        'Anti-Aging Vitamin C Serum with Hyaluronic Acid',
        'Wireless Gaming Mouse with RGB Lighting',
        'Eco-Friendly Bamboo Toothbrush Set of 4'
    ],
    'description': [
        'Premium over-ear headphones with active noise cancellation, 30-hour battery life, and crystal clear sound quality',
        'Made from 100% organic cotton, these soft onesies are perfect for sensitive baby skin. Machine washable and durable',
        'High-carbon stainless steel knives with ergonomic handles. Includes 8 knives and wooden storage block',
        'Track your workouts, monitor heart rate, count steps, and receive notifications. Waterproof design',
        'Luxurious mulberry silk pillowcase that reduces hair frizz and prevents wrinkles. Hypoallergenic',
        'Foldable solar panel charger with dual USB ports. Weather-resistant and perfect for outdoor adventures',
        'Single-origin beans from Ethiopian highlands. Medium roast with notes of chocolate and berries',
        'Clinical-strength serum with 20% Vitamin C and hyaluronic acid. Reduces fine lines and brightens skin',
        'High-precision gaming mouse with customizable DPI settings and programmable RGB lighting effects',
        'Sustainable bamboo toothbrushes with soft bristles. Biodegradable and environmentally friendly'
    ],
    'price': [89.99, 24.99, 149.99, 79.99, 39.99, 59.99, 18.99, 34.99, 49.99, 12.99],
    'rating': [4.5, 4.8, 4.2, 4.6, 4.7, 4.1, 4.9, 4.3, 4.4, 4.6],
    'reviews_text': [
        'Amazing sound quality and the noise cancellation works perfectly on flights. Battery lasts all day!',
        'So soft and comfortable for my baby. Great quality organic cotton and holds up well after washing.',
        'Sharp knives that make cooking a pleasure. The wooden block looks great on my counter too.',
        'Accurate fitness tracking and the heart rate monitor is very reliable. Love the sleep tracking feature.',
        'My hair is so much smoother since switching to this pillowcase. Worth every penny!',
        'Charged my phone twice during a 3-day camping trip. Compact and works even on cloudy days.',
        'Best coffee I have ever tasted! Rich flavor with perfect balance. Will definitely reorder.',
        'Noticed visible improvement in my skin texture after just 2 weeks. No irritation despite sensitive skin.',
        'Perfect for gaming with great precision. The RGB lighting is customizable and looks awesome.',
        'Love that these are eco-friendly! Bristles are just the right softness and they last long.'
    ],
    'stock_quantity': [45, 123, 12, 67, 89, 23, 156, 78, 34, 201]
}

df = pd.DataFrame(data)
print("Original E-commerce Dataset:")
print(df[['product_id', 'product_name', 'price', 'rating']].head())

### Step 2: Convert to Dask DataFrame

In [None]:
# Convert to dask dataframe for Datatune
df = dd.from_pandas(df, npartitions=3)
print(f"Dask DataFrame created with {df.npartitions} partitions")

### Step 3: Map Operation - Product Categorization
Use AI to automatically categorize products into departments and subcategories:

In [None]:
# Map operation to categorize products
categorization_map = Map(
    prompt="Analyze the product name and description to determine the main category, subcategory, and target demographic for this product.",
    output_fields=["main_category", "subcategory", "target_demographic"]
)(llm, df)

print("Running product categorization...")
categorized_df = categorization_map.compute()
final_categorized = finalize(categorized_df)

print("\nProducts with AI-generated categories:")
print(final_categorized[['product_name', 'main_category', 'subcategory', 'target_demographic']].head())

### Step 4: Map Operation - Review Sentiment Analysis
Analyze customer review sentiment and extract key themes:


In [None]:
# Map operation for sentiment analysis
sentiment_map = Map(
    prompt="Analyze the customer review text and determine the overall sentiment (positive/negative/neutral), main compliments mentioned, and any concerns raised.",
    output_fields=["sentiment", "main_compliments", "concerns"]
)(llm, final_categorized)

print("Running sentiment analysis on reviews...")
sentiment_df = sentiment_map.compute()
final_sentiment = finalize(sentiment_df)

print("\nProducts with sentiment analysis:")
print(final_sentiment[['product_name', 'sentiment', 'main_compliments', 'concerns']].head())

### Step 5: Filter Operation - High-Value Products
Filter products based on business criteria - high-value items with good ratings:

In [None]:
# Filter for premium products (high price and good rating)
premium_filter = Filter(
    prompt="Keep only products that are priced above $40 AND have a rating above 4.3. These are our premium offerings."
)(llm, final_sentiment)

print("Filtering for premium products (>$40 and rating >4.3)...")
premium_df = premium_filter.compute()
final_premium = finalize(premium_df)

print(f"\nPremium Products Found: {len(final_premium)}")
print(final_premium[['product_name', 'price', 'rating', 'main_category']])

### Step 6: Filter Operation - Inventory Management
Filter products that need inventory attention:

In [None]:
# Filter products with low stock that are popular (high rating)
low_stock_filter = Filter(
    prompt="Keep only products that have stock quantity below 50 AND rating above 4.0. These popular items need restocking soon."
)(llm, final_sentiment)

print("Filtering for popular products with low stock...")
low_stock_df = low_stock_filter.compute()
final_low_stock = finalize(low_stock_df)

print(f"\nProducts needing restock: {len(final_low_stock)}")
print(final_low_stock[['product_name', 'stock_quantity', 'rating', 'price']])

### Step 7: Map Operation - Marketing Insights
Generate marketing insights and recommended strategies:

In [None]:
# Map operation for marketing strategy
marketing_map = Map(
    prompt="""Based on the product details, customer reviews, and current performance, suggest:
    1. The best marketing channel for this product
    2. Key selling points to highlight in ads
    3. Potential bundle opportunities with other products""",
    output_fields=["best_marketing_channel", "key_selling_points", "bundle_opportunities"]
)(llm, final_sentiment)

print("Generating marketing insights...")
marketing_df = marketing_map.compute()
final_marketing = finalize(marketing_df)

print("\nMarketing insights for products:")
for idx, row in final_marketing.head(3).iterrows():
    print(f"\nProduct: {row['product_name']}")
    print(f"Marketing Channel: {row['best_marketing_channel']}")
    print(f"Key Selling Points: {row['key_selling_points']}")
    print(f"Bundle Ideas: {row['bundle_opportunities']}")

### Step 8: Map Operation - Competitive Pricing Analysis
Analyze pricing strategy and recommendations:

In [None]:
# Map operation for pricing analysis
pricing_map = Map(
    prompt="""Analyze this product's price relative to its features, quality (based on rating and reviews), and category. 
    Determine if the price is competitive, overpriced, or underpriced. Suggest pricing adjustments if needed.""",
    output_fields=["pricing_assessment", "suggested_price_range", "pricing_strategy"]
)(llm, final_marketing)

print("Running pricing analysis...")
pricing_df = pricing_map.compute()
final_pricing = finalize(pricing_df)

print("\nPricing analysis results:")
print(final_pricing[['product_name', 'price', 'pricing_assessment', 'suggested_price_range']].head())

## Summary

In [None]:
# Create a summary report
print("=== E-COMMERCE ANALYTICS SUMMARY ===\n")

print("1. PRODUCT CATEGORIES:")
category_summary = final_categorized.groupby('main_category').size().reset_index(name='count')
print(category_summary)

print(f"\n2. SENTIMENT DISTRIBUTION:")
sentiment_summary = final_sentiment.groupby('sentiment').size().reset_index(name='count')
print(sentiment_summary)

print(f"\n3. INVENTORY ALERTS:")
print(f"   - Premium products: {len(final_premium)} items")
print(f"   - Low stock items: {len(final_low_stock)} items needing restock")

print(f"\n4. AVERAGE METRICS:")
print(f"   - Average price: ${final_pricing['price'].mean():.2f}")
print(f"   - Average rating: {final_pricing['rating'].mean():.2f}")
print(f"   - Average stock: {final_pricing['stock_quantity'].mean():.0f} units")