# DYNAMIC PRICING ENGINE - JETTO.ID
## Data Evaluation


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import warnings
warnings.filterwarnings('ignore')

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")

print("="*60)
print("DYNAMIC PRICING ENGINE - JETTO.ID")
print("Fase 3: Evaluasi & Deployment")
print("="*60)

### load model & data

In [None]:

print("\n" + "="*60)
print("1. MEMUAT MODEL & DATA")
print("="*60)

with open('06_final_model.pkl', 'rb') as f:
    final_model = pickle.load(f)
print("✓ Berhasil memuat model final")

metadata = pd.read_csv('10_model_metadata.csv')
model_name = metadata['model_name'].iloc[0]
print(f"✓ Model: {model_name}")

abt = pd.read_csv('04_analytics_base_table.csv')
df_jetto = pd.read_csv('03_jetto_simulated_transactions.csv')
print(f"✓ Berhasil memuat dataset evaluasi")

transaction_features = df_jetto.groupby('product_id').agg({
    'season': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'normal',
    'day_of_week': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Monday',
    'profit_margin_pct': 'mean',
    'stock_level': 'mean'
}).reset_index()
transaction_features.columns = ['product_id', 'dominant_season', 'dominant_day', 
                                 'avg_profit_margin', 'avg_stock_level']

abt_full = abt.merge(transaction_features, on='product_id', how='left')

with open('09_feature_columns.pkl', 'rb') as f:
    feature_columns = pickle.load(f)

with open('08_label_encoders.pkl', 'rb') as f:
    label_encoders = pickle.load(f)

numeric_features = [col for col in feature_columns if 'encoded' not in col]
categorical_features = ['category', 'dominant_season', 'dominant_day']

abt_full[numeric_features] = abt_full[numeric_features].fillna(abt_full[numeric_features].median())
abt_full[categorical_features] = abt_full[categorical_features].fillna('unknown')

for col in categorical_features:
    le = label_encoders[col]
    abt_full[f'{col}_encoded'] = abt_full[col].apply(
        lambda x: le.transform([str(x)])[0] if str(x) in le.classes_ else -1
    )

X_full = abt_full[feature_columns]
y_full = abt_full['jetto_price_mean']

y_pred = final_model.predict(X_full)

print(f"\n Prediksi dihasilkan untuk {len(X_full)} produk")


### evaluasi model (menyeluruh)

In [None]:

print(f"\n{'='*60}")
print("2. EVALUASI MODEL MENYELURUH")
print("="*60)

r2 = r2_score(y_full, y_pred)
rmse = np.sqrt(mean_squared_error(y_full, y_pred))
mae = mean_absolute_error(y_full, y_pred)
mape = np.mean(np.abs((y_full - y_pred) / (y_full + 1e-10))) * 100

mse = mean_squared_error(y_full, y_pred)
max_error = np.max(np.abs(y_full - y_pred))
median_ae = np.median(np.abs(y_full - y_pred))

print("\n2.1 METRIK PERFORMA")
print("-" * 60)
print(f"R² Score (Koefisien Determinasi): {r2:.4f}")
print(f"RMSE (Root Mean Squared Error): £{rmse:.4f}")
print(f"MAE (Mean Absolute Error): £{mae:.4f}")
print(f"MAPE (Mean Absolute Percentage Error): {mape:.2f}%")
print(f"MSE (Mean Squared Error): {mse:.4f}")
print(f"Max Error: £{max_error:.4f}")
print(f"Median Absolute Error: £{median_ae:.4f}")

print(f"\n2.2 PENCAPAIAN TARGET")
print("-" * 60)
if r2 >= 0.85:
    print(f" R² = {r2:.4f} ≥ 0.85 (TARGET TERCAPAI)")
    status = "SUKSES"
else:
    print(f" R² = {r2:.4f} < 0.85 (Di bawah target tapi dapat diterima)")
    status = "DAPAT DITERIMA"

if mape < 10:
    print(f" MAPE = {mape:.2f}% < 10% (TARGET TERCAPAI)")
else:
    print(f" MAPE = {mape:.2f}% ≥ 10% (Di atas target)")

residuals = y_full - y_pred
print(f"\n2.3 ANALISIS RESIDUAL")
print("-" * 60)
print(f"Mean Residual: £{np.mean(residuals):.4f}")
print(f"Std Residual: £{np.std(residuals):.4f}")
print(f"Min Residual: £{np.min(residuals):.4f}")
print(f"Max Residual: £{np.max(residuals):.4f}")

### analisis distribusi error

In [None]:

print(f"\n{'='*60}")
print("3. ANALISIS DISTRIBUSI ERROR")
print("="*60)

percentage_errors = np.abs((y_full - y_pred) / (y_full + 1e-10)) * 100

print("\nDistribusi Persentase Error:")
print(f"  Error 0-5%: {(percentage_errors < 5).sum()} produk ({(percentage_errors < 5).sum()/len(percentage_errors)*100:.1f}%)")
print(f"  Error 5-10%: {((percentage_errors >= 5) & (percentage_errors < 10)).sum()} produk ({((percentage_errors >= 5) & (percentage_errors < 10)).sum()/len(percentage_errors)*100:.1f}%)")
print(f"  Error 10-20%: {((percentage_errors >= 10) & (percentage_errors < 20)).sum()} produk ({((percentage_errors >= 10) & (percentage_errors < 20)).sum()/len(percentage_errors)*100:.1f}%)")
print(f"  Error >20%: {(percentage_errors >= 20).sum()} produk ({(percentage_errors >= 20).sum()/len(percentage_errors)*100:.1f}%)")


### impact terhadap bisnis (simulasi)

In [None]:


print(f"\n{'='*60}")
print("4. SIMULASI DAMPAK BISNIS")
print("="*60)

print("\n4.1 SIMULASI KEUNTUNGAN (PROFIT)")
print("-" * 60)


df_jetto['predicted_optimal_price'] = df_jetto['product_id'].map(
    dict(zip(abt_full['product_id'], y_pred))
)

df_jetto['revenue_static'] = df_jetto['price_market'] * df_jetto['quantity_sold']
df_jetto['cost_estimate'] = df_jetto['price_market'] * 0.65  
df_jetto['profit_static'] = df_jetto['revenue_static'] - (df_jetto['cost_estimate'] * df_jetto['quantity_sold'])

df_jetto['revenue_dynamic'] = df_jetto['predicted_optimal_price'] * df_jetto['quantity_sold']
df_jetto['profit_dynamic'] = df_jetto['revenue_dynamic'] - (df_jetto['cost_estimate'] * df_jetto['quantity_sold'])

total_revenue_static = df_jetto['revenue_static'].sum()
total_revenue_dynamic = df_jetto['revenue_dynamic'].sum()
total_profit_static = df_jetto['profit_static'].sum()
total_profit_dynamic = df_jetto['profit_dynamic'].sum()

revenue_improvement = (total_revenue_dynamic - total_revenue_static) / total_revenue_static * 100
profit_improvement = (total_profit_dynamic - total_profit_static) / total_profit_static * 100

print(f"Harga Statis (Rata-rata Pasar):")
print(f"  Total Revenue: £{total_revenue_static:,.2f}")
print(f"  Total Profit: £{total_profit_static:,.2f}")

print(f"\nHarga Dinamis (Optimasi Model):")
print(f"  Total Revenue: £{total_revenue_dynamic:,.2f}")
print(f"  Total Profit: £{total_profit_dynamic:,.2f}")

print(f"\n DAMPAK BISNIS:")
print(f"  Peningkatan Revenue: {revenue_improvement:+.2f}%")
print(f"  Peningkatan Profit: {profit_improvement:+.2f}%")
print(f"  Profit Tambahan: £{(total_profit_dynamic - total_profit_static):,.2f}")

if profit_improvement >= 15:
    print(f"\n Peningkatan Profit ≥ 15% (TARGET TERCAPAI)")
else:
    print(f"\n Peningkatan Profit {profit_improvement:.2f}% (Di bawah target 15%)")


### performa per kategori (analisis)

In [None]:

print(f"\n{'='*60}")
print("5. PERFORMA PER KATEGORI")
print("="*60)

eval_df = abt_full[['product_id', 'category', 'jetto_price_mean']].copy()
eval_df['predicted_price'] = y_pred
eval_df['absolute_error'] = np.abs(eval_df['jetto_price_mean'] - eval_df['predicted_price'])
eval_df['percentage_error'] = (eval_df['absolute_error'] / eval_df['jetto_price_mean'] * 100)

category_performance = eval_df.groupby('category').agg({
    'jetto_price_mean': 'count',
    'absolute_error': 'mean',
    'percentage_error': 'mean'
}).round(2)

category_performance.columns = ['Jumlah_Produk', 'Rata2_MAE', 'Rata2_MAPE']
category_performance = category_performance.sort_values('Rata2_MAPE')

print("\nPerforma Berdasarkan Kategori Produk:")
print(category_performance)


### insights fitur penting

In [None]:

print(f"\n{'='*60}")
print("6. WAWASAN FITUR PENTING")
print("="*60)

if hasattr(final_model, 'feature_importances_'):
    feature_importance = pd.DataFrame({
        'Fitur': feature_columns,
        'Pentingnya': final_model.feature_importances_
    }).sort_values('Pentingnya', ascending=False)
    
    print("\n10 Fitur Paling Penting untuk Penentuan Harga:")
    print(feature_importance.head(10).to_string(index=False))
    
    print("\n KEY INSIGHTS:")
    top_feature = feature_importance.iloc[0]['Fitur']
    print(f"  1. Paling Penting: '{top_feature}' menggerakkan keputusan harga")
    print(f"  2. Fitur harga pasar sangat mempengaruhi harga optimal")
    print(f"  3. Pola transaksi dan metrik volume sangat krusial")

### perencanaan ato strategi deployment

In [None]:

print(f"\n{'='*60}")
print("7. STRATEGI DEPLOYMENT & REKOMENDASI")
print("="*60)

print("\n7.1 PENDEKATAN DEPLOYMENT")
print("-" * 60)
print("""
Rekomendasi Strategi Deployment:

Fase 1: UJI COBA PILOT (Bulan 1-2)
  • Deploy model dalam "Mode Advisory" (Saran) untuk 20% produk
  • Jalankan A/B testing: Harga Statis vs Dinamis
  • Monitor KPI: Revenue, Margin Profit, Conversion Rate
  • Kumpulkan feedback dari tim marketing
  
Fase 2: PELUNCURAN BERTAHAP (Bulan 3-4)
  • Perluas ke 50% produk jika pilot sukses
  • Implementasikan update harga otomatis (harian)
  • Siapkan dashboard monitoring real-time
  • Fine-tune model dengan data produksi
  
Fase 3: DEPLOYMENT PENUH (Bulan 5-6)
  • Deploy ke 100% katalog produk
  • Aktifkan penyesuaian harga otomatis penuh
  • Terapkan batasan aturan bisnis:
    - Perubahan harga maksimal: ±15% per hari
    - Margin profit minimal: 20%
    - Integrasi monitoring harga kompetitor
""")

print("\n7.2 KEBUTUHAN TEKNIS")
print("-" * 60)
print("""
Infrastruktur yang Dibutuhkan:

1. Layanan API:
   • REST API untuk prediksi harga real-time
   • Input: Fitur produk
   • Output: Rekomendasi harga optimal
   • Waktu respons: <100ms (Target TERCAPAI jika dioptimasi)

2. Integrasi Database:
   • Koneksi ke database katalog produk
   • Update level inventaris real-time
   • Logging transaksi historis

3. Dashboard Monitoring:
   • Metrik performa model (R², MAPE, RMSE)
   • Metrik bisnis (Revenue, Profit, Konversi)
   • Sistem peringatan (alert) untuk anomali
   • Deteksi pergeseran model (model drift)

4. Pelatihan Ulang Otomatis (Automated Retraining):
   • Retraining model mingguan dengan data baru
   • Framework A/B testing
   • Kontrol versi untuk model
""")

print("\n7.3 ATURAN BISNIS & BATASAN")
print("-" * 60)
print("""
Batasan Harga yang Harus Diterapkan:

1. Batas Harga:
   • Harga Min: Biaya (Cost) + margin 20%
   • Harga Max: Rata-rata pasar + 30%
   • Lantai harga kompetitor: -10%

2. Frekuensi Update:
   • Produk permintaan tinggi: Update real-time
   • Produk standar: Update harian
   • Produk musiman: Review mingguan

3. Mekanisme Keamanan:
   • Persetujuan manusia untuk perubahan harga >20%
   • Pengembalian otomatis jika konversi turun >15%
   • Aturan harga khusus per kategori
""")


### monitoring framework

In [None]:

print(f"\n{'='*60}")
print("8. KERANGKA KERJA MONITORING")
print("="*60)

print("\n8.1 KEY PERFORMANCE INDICATORS (KPIs)")
print("-" * 60)
print("""
KPI Teknis:
  • Model R² Score: Monitor mingguan, Target ≥ 0.85
  • Prediction MAPE: Monitor harian, Target < 10%
  • API Response Time: Monitor real-time, Target < 100ms
  • Prediction Coverage: % produk dengan prediksi, Target = 100%

KPI Bisnis:
  • Pertumbuhan Revenue: Perbandingan minggu-ke-minggu (WoW)
  • Profit Margin: Pelacakan level kategori
  • Conversion Rate: Dampak perubahan harga
  • Kepuasan Pelanggan: Skor feedback

KPI Kualitas Data:
  • Missing Data Rate: Target < 1%
  • Feature Drift: Uji statistik bulanan
  • Prediction Distribution: Bandingkan dengan data training
""")

print("\n8.2 AMBANG BATAS PERINGATAN (ALERTS)")
print("-" * 60)
print("""
Peringatan Otomatis:
  KRITIS: Model R² turun di bawah 0.75
  KRITIS: MAPE melebihi 15%
  PERINGATAN: Profit margin turun >10%
  PERINGATAN: Waktu respons API >150ms
  INFO: Retraining mingguan selesai
""")


### mitigasi risiko

In [None]:

print(f"\n{'='*60}")
print("9. STRATEGI MITIGASI RISIKO")
print("="*60)

print("""
1. Penurunan Performa Model:
   • Mitigasi: Retraining mingguan otomatis
   • Fallback: Kembali ke harga berbasis aturan jika R² < 0.70
   • Monitoring: Pelacakan performa real-time

2. Masalah Kualitas Data:
   • Mitigasi: Validasi input & cek kualitas data
   • Fallback: Gunakan rata-rata historis untuk fitur yang hilang
   • Monitoring: Laporan kualitas data harian

3. Volatilitas Pasar:
   • Mitigasi: Terapkan batas perubahan harga max (±15%)
   • Fallback: Hentikan update otomatis selama volatilitas tinggi
   • Monitoring: Dashboard analisis tren pasar

4. Kegagalan Teknis:
   • Mitigasi: Server API redundan, mekanisme caching
   • Fallback: Sistem cadangan harga statis
   • Monitoring: Cek kesehatan sistem setiap 5 menit

5. Resistensi Bisnis:
   • Mitigasi: Pelaporan transparan, uji coba pilot
   • Fallback: Mode advisory alih-alih otomatis
   • Monitoring: Pengumpulan feedback stakeholder
""")

### pengembangan masa depan

In [None]:

print(f"\n{'='*60}")
print("10. PETA JALAN PENGEMBANGAN (ROADMAP)")
print("="*60)

print("""
Jangka Pendek (3-6 bulan):
  • Integrasi data harga kompetitor real-time
  • Tambahkan fitur segmentasi pelanggan
  • Implementasi pengenalan pola musiman
  • Framework A/B testing untuk strategi harga

Jangka Menengah (6-12 bulan):
  • Model Deep Learning untuk pola kompleks
  • Optimasi multi-objektif (Revenue + Market Share)
  • Rekomendasi harga yang dipersonalisasi
  • Integrasi peramalan permintaan (Demand Forecasting)

Jangka Panjang (12+ bulan):
  • Reinforcement Learning untuk harga adaptif
  • Optimasi harga lintas-kategori
  • Rekomendasi harga bundle
  • Ekspansi pasar internasional
""")


### export

In [None]:



print(f"\n{'='*60}")
print("11. EKSPOR HASIL EVALUASI")
print("="*60)

evaluation_report = {
    'Nama_Model': model_name,
    'Tanggal_Evaluasi': pd.Timestamp.now().strftime('%Y-%m-%d'),
    'R2_Score': r2,
    'RMSE': rmse,
    'MAE': mae,
    'MAPE': mape,
    'Median_Absolute_Error': median_ae,
    'Max_Error': max_error,
    'Produk_Dievaluasi': len(X_full),
    'Peningkatan_Revenue_Pct': revenue_improvement,
    'Peningkatan_Profit_Pct': profit_improvement,
    'Profit_Tambahan_GBP': total_profit_dynamic - total_profit_static,
    'Target_R2_Tercapai': 'Ya' if r2 >= 0.85 else 'Tidak',
    'Target_MAPE_Tercapai': 'Ya' if mape < 10 else 'Tidak',
    'Target_Profit_Tercapai': 'Ya' if profit_improvement >= 15 else 'Tidak',
    'Status_Keseluruhan': status
}

eval_report_df = pd.DataFrame([evaluation_report])
eval_report_df.to_csv('11_evaluation_report.csv', index=False)
print("✓ Tereskpor: 11_evaluation_report.csv")

category_performance.to_csv('12_category_performance.csv')
print("✓ Tereskpor: 12_category_performance.csv")

prediction_results = abt_full[['product_id', 'product_name', 'category', 'jetto_price_mean']].copy()
prediction_results['predicted_price'] = y_pred
prediction_results['absolute_error'] = np.abs(prediction_results['jetto_price_mean'] - y_pred)
prediction_results['percentage_error'] = (prediction_results['absolute_error'] / 
                                          prediction_results['jetto_price_mean'] * 100)
prediction_results.to_csv('13_prediction_results.csv', index=False)
print("✓ Tereskpor: 13_prediction_results.csv")

business_impact = df_jetto.groupby('product_category').agg({
    'revenue_static': 'sum',
    'revenue_dynamic': 'sum',
    'profit_static': 'sum',
    'profit_dynamic': 'sum'
}).reset_index()

business_impact['revenue_improvement_pct'] = ((business_impact['revenue_dynamic'] - 
                                               business_impact['revenue_static']) / 
                                              business_impact['revenue_static'] * 100)
business_impact['profit_improvement_pct'] = ((business_impact['profit_dynamic'] - 
                                              business_impact['profit_static']) / 
                                             business_impact['profit_static'] * 100)

business_impact.to_csv('14_business_impact_by_category.csv', index=False)
print("✓ Tereskpor: 14_business_impact_by_category.csv")


### ringkasan / simpulan atau summary

In [None]:


print(f"\n{'='*60}")
print("RINGKASAN AKHIR PROYEK")
print("="*60)

print(f"\nPERFORMA MODEL:")
print(f"  • R² Score: {r2:.4f} {'✅' if r2 >= 0.85 else '⚠️'}")
print(f"  • RMSE: £{rmse:.4f}")
print(f"  • MAPE: {mape:.2f}% {'✅' if mape < 10 else '⚠️'}")

print(f"\n DAMPAK BISNIS:")
print(f"  • Peningkatan Profit: {profit_improvement:+.2f}% {'✅' if profit_improvement >= 15 else '⚠️'}")
print(f"  • Profit Tambahan: £{(total_profit_dynamic - total_profit_static):,.2f}")
print(f"  • Pertumbuhan Revenue: {revenue_improvement:+.2f}%")

print(f"\n PENCAPAIAN TARGET:")
targets_met = sum([
    r2 >= 0.85,
    mape < 10,
    profit_improvement >= 15
])
print(f"  • Target Tercapai: {targets_met}/3")
if targets_met == 3:
    print(f"  • Status:  SEMUA TARGET TERCAPAI!")
elif targets_met >= 2:
    print(f"  • Status:  DAPAT DITERIMA - Mayoritas target tercapai")
else:
    print(f"  • Status:  BUTUH PERBAIKAN")

print(f"\n HASIL SERAHAN (DELIVERABLES):")
print(f"  • Data Pasar Bersih")
print(f"  • Dataset Fitur Produk")
print(f"  • Transaksi Simulasi Jetto (50,000+)")
print(f"  • Analytics Base Table")
print(f"  • Model ML Terlatih (.pkl)")
print(f"  • Analisis Pentingnya Fitur")
print(f"  • Laporan Evaluasi")
print(f"  • Analisis Dampak Bisnis")
print(f"  • Dokumen Strategi Deployment")

print(f"\n SIAP UNTUK DEPLOYMENT:")
print(f"  • Artefak model tersimpan dan siap")
print(f"  • Strategi deployment terdokumentasi")
print(f"  • Kerangka monitoring terdefinisi")
print(f"  • Strategi mitigasi risiko tersedia")

print(f"\n{'='*60}")
print("✓ PROYEK SELESAI DENGAN SUKSES!")
print("="*60)

print("""
LANGKAH SELANJUTNYA:
1. Tinjau laporan evaluasi dengan pemangku kepentingan
2. Siapkan lingkungan uji coba pilot
3. Implementasikan layanan API untuk prediksi
4. Konfigurasi dashboard monitoring
5. Jadwalkan pipeline retraining model
6. Luncurkan program pilot (20% produk)

Terima kasih telah menggunakan Dynamic Pricing Engine!
Untuk pertanyaan atau dukungan, hubungi Tim Data Science.
""")

print("="*60)