## Setup

In [1]:
import pandas as pd
import random
import uuid
from datetime import datetime, timedelta, date
from faker import Faker

# Initialize Faker (using Indonesian locale for more contextual data)
fake = Faker('id_ID')

# --- Configuration ---
NUM_CABANG = 100
NUM_KARYAWAN = 2000
NUM_TRANSAKSI_HARIAN_RECORDS = 10000
# Number of unique shopping instances (each can have one or more items)
NUM_UNIQUE_TRANSACTION_EVENTS = 4000

# --- Master Data ---
JABATAN_LIST = ['Kasir', 'Staff Gudang', 'Supervisor Toko', 'Asisten Manajer', 'Manajer Toko', 'Pramuniaga', 'Admin']

# Items and their consistent prices
ITEMS_PRICE_MAP = {
    'Beras Premium 5kg': 65000,
    'Minyak Goreng Refill 2L': 32000,
    'Gula Pasir 1kg': 14000,
    'Telur Ayam (per kg)': 25000,
    'Roti Tawar': 15000,
    'Susu UHT Cokelat 1L': 18000,
    'Kopi Instan Sachet (isi 10)': 12000,
    'Teh Celup Kotak (isi 25)': 8000,
    'Mie Instan Goreng (5 bungkus)': 13000,
    'Sabun Mandi Batang': 3000,
    'Shampoo Botol 170ml': 22000,
    'Pasta Gigi 100g': 9000,
    'Deterjen Bubuk 800g': 17000,
    'Air Mineral Galon 19L': 20000,
    'Biskuit Kaleng': 35000
}
ITEM_NAMES = list(ITEMS_PRICE_MAP.keys())

# Date range for transactions
TRANSACTION_START_DATE = date(2024, 1, 1)
TRANSACTION_END_DATE = date(2024, 12, 31)
TRANSACTION_DATE_RANGE_DAYS = (TRANSACTION_END_DATE - TRANSACTION_START_DATE).days

print("Setup complete. Faker initialized for 'id_ID'.")
print(f"Target Cabang: {NUM_CABANG}")
print(f"Target Karyawan: {NUM_KARYAWAN}")
print(f"Target Transaksi Harian Records: {NUM_TRANSAKSI_HARIAN_RECORDS}")
print(f"Target Unique Transaction Events: {NUM_UNIQUE_TRANSACTION_EVENTS}")

Setup complete. Faker initialized for 'id_ID'.
Target Cabang: 100
Target Karyawan: 2000
Target Transaksi Harian Records: 10000
Target Unique Transaction Events: 4000


## Generate `Cabang` Sheet

In [None]:
# Cell 2: Generate Cabang Data (Corrected)

cabang_data = []
for i in range(1, NUM_CABANG + 1):
    id_cabang = f"CB{i:03d}" # e.g., CB001, CB002, ..., CB100
    nama_cabang = f"Cabang {fake.city_name()}" # Using city_name for variety in branch names
    if random.random() < 0.3: # Add some variation like "Utama", "Pembantu"
        nama_cabang += " " + random.choice(["Utama", "Sentra", "Express", "Plus"])
    
    # Corrected line: using administrative_unit instead of province
    # and ensuring fake.city() or fake.city_name() is used for city
    lokasi = f"{fake.street_address()}, {fake.city()}, {fake.administrative_unit()}"
    kontak_cabang = fake.phone_number()
    
    cabang_data.append({
        "id_cabang": id_cabang,
        "nama_cabang": nama_cabang,
        "lokasi": lokasi,
        "kontak_cabang": kontak_cabang
    })

df_cabang = pd.DataFrame(cabang_data)
print("--- Cabang Data (First 5 Rows) ---")
print(df_cabang.head())
print(f"\nGenerated {len(df_cabang)} cabang records.")

AttributeError: 'Generator' object has no attribute 'province'

In [None]:
df_cabang.head()

## Generate `Karyawan` Sheet

In [None]:
karyawan_data = []
if df_cabang.empty:
    print("Cabang data is empty. Please generate cabang data first.")
else:
    list_id_cabang = df_cabang['id_cabang'].tolist()
    for i in range(1, NUM_KARYAWAN + 1):
        id_karyawan = f"KR{i:04d}" # e.g., KR0001, ..., KR2000
        nama_karyawan = fake.name()
        jabatan = random.choice(JABATAN_LIST)
        assigned_id_cabang = random.choice(list_id_cabang)
        
        karyawan_data.append({
            "id_karyawan": id_karyawan,
            "nama_karyawan": nama_karyawan,
            "jabatan": jabatan,
            "id_cabang": assigned_id_cabang
        })

    df_karyawan = pd.DataFrame(karyawan_data)
    print("--- Karyawan Data (First 5 Rows) ---")
    print(f"\nGenerated {len(df_karyawan)} karyawan records.")
    df_karyawan.head()


In [None]:
# Create a helper mapping for easier lookup of (karyawan_id, cabang_id)
karyawan_cabang_map = []
if not df_karyawan.empty:
    karyawan_cabang_map = list(df_karyawan[['id_karyawan', 'id_cabang']].itertuples(index=False, name=None))

## Generate `transaksi_harian` sheet

In [None]:
transaksi_harian_data = []

if not karyawan_cabang_map:
    print("Karyawan data is empty or map not created. Please generate karyawan data first.")
else:
    # 1. Generate unique transaction events (core info)
    unique_transaction_events = []
    for i in range(1, NUM_UNIQUE_TRANSACTION_EVENTS + 1):
        id_transaksi_unik = f"TRX{i:05d}" # e.g., TRX00001
        id_karyawan, id_cabang = random.choice(karyawan_cabang_map)
        
        random_days = random.randint(0, TRANSACTION_DATE_RANGE_DAYS)
        tanggal_transaksi = TRANSACTION_START_DATE + timedelta(days=random_days)
        
        unique_transaction_events.append({
            "id_transaksi": id_transaksi_unik,
            "id_karyawan": id_karyawan,
            "id_cabang": id_cabang,
            "tanggal": tanggal_transaksi.isoformat()
        })

    # 2. Generate transaction line items to meet NUM_TRANSAKSI_HARIAN_RECORDS
    current_records_count = 0
    
    # Ensure each unique transaction event has at least one item
    for event in unique_transaction_events:
        if current_records_count >= NUM_TRANSAKSI_HARIAN_RECORDS:
            break # Stop if we've already reached the target
            
        id_transaksi_harian = str(uuid.uuid4())
        nama_barang = random.choice(ITEM_NAMES)
        harga_barang = ITEMS_PRICE_MAP[nama_barang]
        qty = random.randint(1, 5) # Max 5 items per line item
        total_transaksi = qty * harga_barang
        
        transaksi_harian_data.append({
            "id_transaksi_harian": id_transaksi_harian,
            "id_transaksi": event["id_transaksi"],
            "id_cabang": event["id_cabang"],
            "id_karyawan": event["id_karyawan"],
            "tanggal": event["tanggal"],
            "nama_barang": nama_barang,
            "qty": qty,
            "harga_barang": harga_barang,
            "total_transaksi": total_transaksi
        })
        current_records_count += 1

    # Add more items to random unique transactions until NUM_TRANSAKSI_HARIAN_RECORDS is met
    while current_records_count < NUM_TRANSAKSI_HARIAN_RECORDS:
        event = random.choice(unique_transaction_events) # Pick a random existing transaction event
        
        id_transaksi_harian = str(uuid.uuid4())
        nama_barang = random.choice(ITEM_NAMES)
        harga_barang = ITEMS_PRICE_MAP[nama_barang]
        qty = random.randint(1, 5)
        total_transaksi = qty * harga_barang
        
        transaksi_harian_data.append({
            "id_transaksi_harian": id_transaksi_harian,
            "id_transaksi": event["id_transaksi"],
            "id_cabang": event["id_cabang"],
            "id_karyawan": event["id_karyawan"],
            "tanggal": event["tanggal"], # Use the same date as the parent transaction event
            "nama_barang": nama_barang,
            "qty": qty,
            "harga_barang": harga_barang,
            "total_transaksi": total_transaksi
        })
        current_records_count += 1
        
    df_transaksi_harian = pd.DataFrame(transaksi_harian_data)
    # Shuffle the transaction data for more randomness if needed
    df_transaksi_harian = df_transaksi_harian.sample(frac=1).reset_index(drop=True)

    print("--- Transaksi Harian Data (First 5 Rows) ---")
    print(df_transaksi_harian.head())
    print(f"\nGenerated {len(df_transaksi_harian)} transaksi harian records.")

## Import Grocery Data to `.csv` 

In [None]:
# Define the output Excel file name
excel_output_filename = "synthetic_grocery_data.xlsx"

try:
    with pd.ExcelWriter(excel_output_filename, engine='openpyxl') as writer:
        if not df_cabang.empty:
            df_cabang.to_excel(writer, sheet_name='Cabang', index=False)
            print(f"Sheet 'Cabang' written with {len(df_cabang)} rows.")
        else:
            print("Cabang data is empty, not writing to Excel.")
            
        if not df_karyawan.empty:
            df_karyawan.to_excel(writer, sheet_name='Karyawan', index=False)
            print(f"Sheet 'Karyawan' written with {len(df_karyawan)} rows.")
        else:
            print("Karyawan data is empty, not writing to Excel.")
            
        if not df_transaksi_harian.empty:
            df_transaksi_harian.to_excel(writer, sheet_name='Transaksi_Harian', index=False)
            print(f"Sheet 'Transaksi_Harian' written with {len(df_transaksi_harian)} rows.")
        else:
            print("Transaksi Harian data is empty, not writing to Excel.")
            
    print(f"\nSuccessfully exported data to '{excel_output_filename}'")
    # alhamdulillah
except Exception as e:
    print(f"An error occurred during Excel export: {e}")

# Note: If you strictly need CSV files, you would do:
# df_cabang.to_csv('cabang_data.csv', index=False)
# df_karyawan.to_csv('karyawan_data.csv', index=False)
# df_transaksi_harian.to_csv('transaksi_harian_data.csv', index=False)
# print("\nIf you prefer CSV, three separate files would be created.")