In [1]:
import pandas as pd

# Ganti dengan nama file yang kamu pakai
csv_path = "data_imu_oke.csv"

try:
    df = pd.read_csv(csv_path)
    print(f"✅ CSV loaded successfully: {csv_path}")
    print("📌 Shape:", df.shape)
    print("🔍 Column names:", df.columns.tolist())
    print("🧪 Label distribution:\n", df['label'].value_counts())
except Exception as e:
    print(f"❌ Failed to load CSV: {e}")


✅ CSV loaded successfully: data_imu_oke.csv
📌 Shape: (1080, 7)
🔍 Column names: ['ax', 'ay', 'az', 'gx', 'gy', 'gz', 'label']
🧪 Label distribution:
 label
10    364
0     361
1     355
Name: count, dtype: int64


In [5]:
import pandas as pd

def expand_csv_proportionally(df, target_total_rows):
    """Perbesar dataset agar jumlah baris sesuai target, dengan proporsi label tetap."""
    # Hitung distribusi asli
    label_counts = df['label'].value_counts(normalize=True)

    # Hitung jumlah baris yang harus diambil dari masing-masing label
    new_counts = (label_counts * target_total_rows).astype(int)

    # Ambil sampel per label sesuai jumlah baru
    expanded_df = pd.concat([
        df[df['label'] == label].sample(n=count, replace=True)
        for label, count in new_counts.items()
    ], ignore_index=True)

    # Shuffle hasil akhir
    return expanded_df.sample(frac=1).reset_index(drop=True)

def export_to_csv(df, filename="expanded_proportional.csv"):
    df.to_csv(filename, index=False)
    print(f"✅ Dataset tersimpan di: {filename}")

# Contoh penggunaan
if __name__ == "__main__":
    df = pd.read_csv("data_imu_oke.csv")
    expanded_df = expand_csv_proportionally(df, target_total_rows=10000)
    export_to_csv(expanded_df)
    print("🧪 Label distribution:\n", expanded_df['label'].value_counts())

✅ Dataset tersimpan di: expanded_proportional.csv
🧪 Label distribution:
 label
10    3370
0     3342
1     3287
Name: count, dtype: int64
