In [3]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import plotly.express as px
import pandas as pd
import psutil
import tracemalloc
import psutil
import torch
import platform
import subprocess
import pyRAPL
import plotly.graph_objects as go
import timeit

In [4]:
# Load dataset
path = 'lung_mysarahmadbhat.csv'

df = pd.read_csv(path)

df.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO


In [5]:
df.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [6]:
print(df.dtypes)

GENDER                   object
AGE                       int64
SMOKING                   int64
YELLOW_FINGERS            int64
ANXIETY                   int64
PEER_PRESSURE             int64
CHRONIC DISEASE           int64
FATIGUE                   int64
ALLERGY                   int64
WHEEZING                  int64
ALCOHOL CONSUMING         int64
COUGHING                  int64
SHORTNESS OF BREATH       int64
SWALLOWING DIFFICULTY     int64
CHEST PAIN                int64
LUNG_CANCER              object
dtype: object


In [7]:
import plotly.express as px

# Membuat DataFrame tdf yang berisi counts dari variabel 'Level'
tdf = df['LUNG_CANCER'].value_counts().reset_index()
tdf.columns = ['LUNG_CANCER', 'count']  # Memberi nama kolom agar lebih mudah dipahami

# Menambahkan kolom persentase
total_count = tdf['count'].sum()
tdf['percentage'] = (tdf['count'] / total_count) * 100

# Membuat diagram batang interaktif menggunakan Plotly
fig = px.bar(tdf, x='LUNG_CANCER', y='count',
             title='Lung Cancer Distribution',
             labels={'LUNG_CANCER': 'Lung Cancer Category', 'count': 'Count'},
             color='LUNG_CANCER',  # Memberikan warna berbeda untuk setiap kategori
             color_discrete_sequence=px.colors.qualitative.Set1,  # Menentukan palet warna
             text=tdf.apply(lambda row: f"{int(row['count'])} ({row['percentage']:.1f}%)", axis=1))  # Menambahkan count dan persentase

# Menampilkan chart
fig.update_traces(textposition='outside',  # Menempatkan teks di luar batang
                  texttemplate='%{text}')  # Menampilkan nilai count dan persentase

# Menyesuaikan margin agar teks tidak tertutup
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),  # Memberikan ruang pada sisi kiri, kanan, atas, dan bawah
)

fig.show()


In [8]:
print(df.isnull().sum())

GENDER                   0
AGE                      0
SMOKING                  0
YELLOW_FINGERS           0
ANXIETY                  0
PEER_PRESSURE            0
CHRONIC DISEASE          0
FATIGUE                  0
ALLERGY                  0
WHEEZING                 0
ALCOHOL CONSUMING        0
COUGHING                 0
SHORTNESS OF BREATH      0
SWALLOWING DIFFICULTY    0
CHEST PAIN               0
LUNG_CANCER              0
dtype: int64


Tahap Preprocessing (Encoding tipe data)

In [9]:
# Mengubah kolom 'GENDER' menjadi numerik (F = 2, M = 1)
df['GENDER'] = df['GENDER'].map({'F': 2, 'M': 1})

# Mengubah kolom 'LUNG_CANCER' menjadi numerik (NO = 0, YES = 1)
df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'NO': 0, 'YES': 1})

# Cek hasilnya
print(df[['GENDER', 'LUNG_CANCER']].head())

   GENDER  LUNG_CANCER
0       1            1
1       1            1
2       2            0
3       1            0
4       2            0


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 309 entries, 0 to 308
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   GENDER                 309 non-null    int64
 1   AGE                    309 non-null    int64
 2   SMOKING                309 non-null    int64
 3   YELLOW_FINGERS         309 non-null    int64
 4   ANXIETY                309 non-null    int64
 5   PEER_PRESSURE          309 non-null    int64
 6   CHRONIC DISEASE        309 non-null    int64
 7   FATIGUE                309 non-null    int64
 8   ALLERGY                309 non-null    int64
 9   WHEEZING               309 non-null    int64
 10  ALCOHOL CONSUMING      309 non-null    int64
 11  COUGHING               309 non-null    int64
 12  SHORTNESS OF BREATH    309 non-null    int64
 13  SWALLOWING DIFFICULTY  309 non-null    int64
 14  CHEST PAIN             309 non-null    int64
 15  LUNG_CANCER            309 non-null    i

In [11]:
# Pisahkan data
X = df.drop(columns=['LUNG_CANCER'])  # Semua kolom kecuali 'LUNG_CANCER'
y = df['LUNG_CANCER']

In [12]:
X.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN
0,1,69,1,2,2,1,1,2,1,2,2,2,2,2,2
1,1,74,2,1,1,1,2,2,2,1,1,1,2,2,2
2,2,59,1,1,1,2,1,2,1,2,1,2,2,1,2
3,1,63,2,2,2,1,1,1,1,1,2,1,1,2,2
4,2,63,1,2,1,1,1,1,1,2,1,2,2,1,1


Tahap Split Data (80,20)

In [13]:
# Membagi data menjadi train dan test (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membagi data menjadi train dan test (80% train, 20% test), serta menggunakan Stratified ShuffleSplit pada kolom y
X_train_st, X_test_st, y_train_st, y_test_st = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Menampilkan ukuran data training dan testing
train_size = len(X_train)
test_size = len(X_test)

train_size_st = len(X_train_st)
test_size_st = len(X_test_st)

print(f"Train size: {train_size} data")
print(f"Test size: {test_size} data")

print(f"Train size Stratify: {train_size_st} data")
print(f"Test size Stratify: {test_size_st} data")

Train size: 247 data
Test size: 62 data
Train size Stratify: 247 data
Test size Stratify: 62 data


In [14]:

# Inisialisasi model Decision Tree
dtree = DecisionTreeClassifier(random_state=42)

# Latih model Decision Tree
dtree.fit(X_train, y_train)

# Prediksi dengan Decision Tree
dtree_pred = dtree.predict(X_test)

# Evaluasi model Decision Tree
dtree_accuracy = accuracy_score(y_test, dtree_pred)
dtree_report = classification_report(y_test, dtree_pred)
dtree_conf_matrix = confusion_matrix(y_test, dtree_pred)

# Menampilkan hasil evaluasi
print("Decision Tree Accuracy:", dtree_accuracy)
print("Decision Tree Classification Report:\n", dtree_report)
print("Confusion Matrix:\n", dtree_conf_matrix)

Decision Tree Accuracy: 0.967741935483871
Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.98      0.98      0.98        60

    accuracy                           0.97        62
   macro avg       0.74      0.74      0.74        62
weighted avg       0.97      0.97      0.97        62

Confusion Matrix:
 [[ 1  1]
 [ 1 59]]


In [15]:
# Inisialisasi model Decision Tree
dtree_st = DecisionTreeClassifier(random_state=42)

# Latih model Decision Tree
dtree_st.fit(X_train_st, y_train_st)

# Prediksi dengan Decision Tree
dtree_pred_st = dtree_st.predict(X_test_st)

# Evaluasi model Decision Tree
dtree_accuracy_st = accuracy_score(y_test_st, dtree_pred_st)
dtree_report_st = classification_report(y_test_st, dtree_pred_st)
dtree_conf_matrix_st = confusion_matrix(y_test_st, dtree_pred_st)

# Menampilkan hasil evaluasi
print("Decision Tree Accuracy (ST):", dtree_accuracy_st)
print("Decision Tree Classification Repor (ST)t:\n", dtree_report_st)
print("Confusion Matrix (ST):\n", dtree_conf_matrix_st)


Decision Tree Accuracy (ST): 0.9193548387096774
Decision Tree Classification Repor (ST)t:
               precision    recall  f1-score   support

           0       0.64      0.88      0.74         8
           1       0.98      0.93      0.95        54

    accuracy                           0.92        62
   macro avg       0.81      0.90      0.84        62
weighted avg       0.94      0.92      0.92        62

Confusion Matrix (ST):
 [[ 7  1]
 [ 4 50]]


In [16]:
# Inisialisasi model Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Latih model Random Forest
rf.fit(X_train, y_train)

# Prediksi dengan Random Forest
rf_pred = rf.predict(X_test)

# Evaluasi model Random Forest
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_report = classification_report(y_test, rf_pred)
rf_conf_matrix = confusion_matrix(y_test, rf_pred)

# Menampilkan hasil evaluasi
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", rf_report)
print("Confusion Matrix:\n", rf_conf_matrix)

Random Forest Accuracy: 0.967741935483871
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.98      0.98      0.98        60

    accuracy                           0.97        62
   macro avg       0.74      0.74      0.74        62
weighted avg       0.97      0.97      0.97        62

Confusion Matrix:
 [[ 1  1]
 [ 1 59]]


In [17]:
# Inisialisasi model Random Forest
rf_st = RandomForestClassifier(n_estimators=100, random_state=42)

# Latih model Random Forest
rf_st.fit(X_train_st, y_train_st)

# Prediksi dengan Random Forest
rf_pred_st = rf_st.predict(X_test_st)

# Evaluasi model Random Forest
rf_accuracy_st = accuracy_score(y_test_st, rf_pred_st)
rf_report_st = classification_report(y_test_st, rf_pred_st)
rf_conf_matrix_st = confusion_matrix(y_test_st, rf_pred_st)

# Menampilkan hasil evaluasi
print("Random Forest Accuracy (ST):", rf_accuracy_st)
print("Random Forest Classification Report (ST):\n", rf_report_st)
print("Confusion Matrix (ST):\n", rf_conf_matrix_st)

Random Forest Accuracy (ST): 0.9193548387096774
Random Forest Classification Report (ST):
               precision    recall  f1-score   support

           0       0.67      0.75      0.71         8
           1       0.96      0.94      0.95        54

    accuracy                           0.92        62
   macro avg       0.81      0.85      0.83        62
weighted avg       0.92      0.92      0.92        62

Confusion Matrix (ST):
 [[ 6  2]
 [ 3 51]]


In [18]:
# 1. Predict probabilities untuk ROC Curve
y_probs_dtree = dtree.predict_proba(X_test)[:, 1]  
y_probs_rf = rf.predict_proba(X_test)[:, 1]  

y_probs_dtree_st = dtree_st.predict_proba(X_test_st)[:, 1]  
y_probs_rf_st = rf_st.predict_proba(X_test_st)[:, 1]  

# 2. Hitung AUC untuk semua model
auc_dtree = roc_auc_score(y_test, y_probs_dtree)
auc_rf = roc_auc_score(y_test, y_probs_rf)
auc_dtree_st = roc_auc_score(y_test_st, y_probs_dtree_st)
auc_rf_st = roc_auc_score(y_test_st, y_probs_rf_st)

# 3. Hitung ROC Curve (FPR & TPR)
fpr_dtree, tpr_dtree, _ = roc_curve(y_test, y_probs_dtree)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_probs_rf)
fpr_dtree_st, tpr_dtree_st, _ = roc_curve(y_test_st, y_probs_dtree_st)
fpr_rf_st, tpr_rf_st, _ = roc_curve(y_test_st, y_probs_rf_st)

# 4. Membuat ROC Curve dengan Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=fpr_dtree, y=tpr_dtree, mode='lines',
    name=f'Decision Tree (AUC = {auc_dtree:.4f})',
    line=dict(color='#1f77b4', dash='dash')  # Biru Soft
))

fig.add_trace(go.Scatter(
    x=fpr_rf, y=tpr_rf, mode='lines',
    name=f'Random Forest (AUC = {auc_rf:.4f})',
    line=dict(color='red', dash='dot')  # Oranye Soft
))

fig.add_trace(go.Scatter(
    x=fpr_dtree_st, y=tpr_dtree_st, mode='lines',
    name=f'Decision Tree (ST) (AUC = {auc_dtree_st:.4f})',
    line=dict(color='#2ca02c', dash='solid')  # Hijau Soft
))

fig.add_trace(go.Scatter(
    x=fpr_rf_st, y=tpr_rf_st, mode='lines',
    name=f'Random Forest (ST) (AUC = {auc_rf_st:.4f})',
    line=dict(color='blue', dash='longdash')  # Merah Soft
))

# Garis referensi (Random Guess)
fig.add_trace(go.Scatter(
    x=[0, 1], y=[0, 1], mode='lines',
    name='Random', line=dict(color='#7f7f7f', dash='dashdot')  # Abu-abu Netral
))

fig.update_layout(
    title="ROC Curve - Decision Tree & Random Forest (Stratified & Non-Stratified)",
    xaxis_title="False Positive Rate",
    yaxis_title="True Positive Rate",
    showlegend=True,
    font=dict(size=14),  # Ukuran teks lebih nyaman
    legend=dict(
        bgcolor="rgba(255, 255, 255, 0.8)",  # Latar belakang legend (semi-transparan)
        bordercolor="black",  # Warna border hitam
        borderwidth=1  # Ketebalan border
    ),
)

# Menampilkan plot
fig.show()

In [19]:
# Informasi CPU
cpu_model = subprocess.run(["cat", "/proc/cpuinfo"], capture_output=True, text=True).stdout
cpu_name = [line for line in cpu_model.split("\n") if "model name" in line]
cpu_name = cpu_name[0].split(":")[1].strip() if cpu_name else "Unknown"

cpu_architecture = platform.machine()
cpu_cores = psutil.cpu_count(logical=False)
cpu_threads = psutil.cpu_count(logical=True)
cpu_freq = psutil.cpu_freq().max if psutil.cpu_freq() else "Unknown"

# Informasi Cache CPU
cache_info = subprocess.run(["lscpu"], capture_output=True, text=True).stdout
l1_instruction_cache, l1_data_cache, l2_cache, l3_cache = "Unknown", "Unknown", "Unknown", "Unknown"

for line in cache_info.split("\n"):
    if "L1d cache" in line:
        l1_data_cache = line.split(":")[1].strip()
    elif "L1i cache" in line:
        l1_instruction_cache = line.split(":")[1].strip()
    elif "L2 cache" in line:
        l2_cache = line.split(":")[1].strip()
    elif "L3 cache" in line:
        l3_cache = line.split(":")[1].strip()

# Informasi RAM
ram = psutil.virtual_memory()
total_ram = round(ram.total / (1024**3), 2)  # Konversi ke GB

# Informasi Disk
disk = psutil.disk_usage('/')
total_disk = round(disk.total / (1024**3), 2)

# Informasi GPU (jika tersedia)
gpu_name = "None"
gpu_memory = "N/A"
gpu_info = "No GPU detected."

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    total_memory = torch.cuda.get_device_properties(0).total_memory
    gpu_memory = f"{round(total_memory / (1024**3), 2)} GB"

    # Gunakan nvidia-smi untuk detail tambahan
    nvidia_info = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv"], 
                                 capture_output=True, text=True).stdout
    gpu_info = nvidia_info if nvidia_info else gpu_name

# Output
print("=== System Information ===")
print(f"CPU Model        : {cpu_name}")
print(f"CPU Architecture : {cpu_architecture}")
print(f"CPU Cores        : {cpu_cores} (Physical), {cpu_threads} (Logical)")
print(f"CPU Max Frequency: {cpu_freq} MHz")
print(f"Total RAM        : {total_ram} GB")
print(f"Total Disk       : {total_disk} GB")
print(f"GPU Model        : {gpu_name} ({gpu_memory})")

print("\n=== CPU Cache Information ===")
print(f"L1 Instruction Cache: {l1_instruction_cache}")
print(f"L1 Data Cache      : {l1_data_cache}")
print(f"L2 Cache           : {l2_cache}")
print(f"L3 Cache           : {l3_cache}")

if torch.cuda.is_available():
    print("\n=== GPU Details ===")
    print(gpu_info)
else:
    print("\nNo GPU detected.")

=== System Information ===
CPU Model        : 13th Gen Intel(R) Core(TM) i7-13700H
CPU Architecture : x86_64
CPU Cores        : 14 (Physical), 20 (Logical)
CPU Max Frequency: 4400.0 MHz
Total RAM        : 15.34 GB
Total Disk       : 20.74 GB
GPU Model        : None (N/A)

=== CPU Cache Information ===
L1 Instruction Cache: 704 KiB (14 instances)
L1 Data Cache      : 544 KiB (14 instances)
L2 Cache           : 11.5 MiB (8 instances)
L3 Cache           : 24 MiB (1 instance)

No GPU detected.


In [20]:
# mengukur waktu eksekusi DT
execution_times_dt = [timeit.timeit('dtree.fit(X_train, y_train)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_dt = min(execution_times_dt)
max_time_dt = max(execution_times_dt)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_dt:.4f} seconds")
print(f"Execution Time (Max): {max_time_dt:.4f} seconds")

Execution Time (Min): 0.0020 seconds
Execution Time (Max): 0.0130 seconds


In [21]:
# mengukur waktu eksekusi DT Stratified
execution_times_dt_st = [timeit.timeit('dtree_st.fit(X_train_st, y_train_st)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_dt_st = min(execution_times_dt_st)
max_time_dt_st = max(execution_times_dt_st)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_dt_st:.4f} seconds")
print(f"Execution Time (Max): {max_time_dt_st:.4f} seconds")

Execution Time (Min): 0.0019 seconds
Execution Time (Max): 0.0054 seconds


In [22]:
# mengukur waktu eksekusi RF
execution_times_rf = [timeit.timeit('rf.fit(X_train, y_train)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_rf = min(execution_times_rf)
max_time_rf = max(execution_times_rf)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_rf:.4f} seconds")
print(f"Execution Time (Max): {max_time_rf:.4f} seconds")

Execution Time (Min): 0.1126 seconds
Execution Time (Max): 0.1850 seconds


In [23]:
# mengukur waktu eksekusi RF Stratified
execution_times_rf_st = [timeit.timeit('rf_st.fit(X_train_st, y_train_st)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_rf_st = min(execution_times_rf_st)
max_time_rf_st = max(execution_times_rf_st)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_rf_st:.4f} seconds")
print(f"Execution Time (Max): {max_time_rf_st:.4f} seconds")

Execution Time (Min): 0.1110 seconds
Execution Time (Max): 0.1565 seconds


In [24]:
# Fungsi untuk mendapatkan penggunaan RAM awal dan akhir dari tracemalloc
def process_memory():
    snapshot, _ = tracemalloc.get_traced_memory()
    return snapshot / (1024 * 1024)  # Konversi ke MB

In [25]:
def measure_memory(model, x_train, y_train):
    tracemalloc.start()
    initial_memory = process_memory()  # Ambil memori awal
    model.fit(x_train, y_train)  # Latih model
    final_memory = process_memory()  # Ambil memori akhir
    tracemalloc.stop()
    return final_memory - initial_memory  # Selisih pemakaian memori

In [26]:
# Memory usage DT
memory_usages_dt = [measure_memory(dtree, X_train, y_train) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_dt, max_memory_dt = min(memory_usages_dt), max(memory_usages_dt)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_dt:.4f} MB")
print(f"Memory Usage (Max): {max_memory_dt:.4f} MB")

Memory Usage (Min): 0.0010 MB
Memory Usage (Max): 0.0029 MB


In [27]:
# Memory usage DT Stratified
memory_usages_dt_st = [measure_memory(dtree_st, X_train_st, y_train_st) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_dt_st, max_memory_dt_st = min(memory_usages_dt_st), max(memory_usages_dt_st)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_dt_st:.4f} MB")
print(f"Memory Usage (Max): {max_memory_dt_st:.4f} MB")

Memory Usage (Min): 0.0010 MB
Memory Usage (Max): 0.0072 MB


In [28]:
# Memory usage RF
memory_usages_rf = [measure_memory(rf, X_train, y_train) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_rf, max_memory_rf = min(memory_usages_rf), max(memory_usages_rf)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_rf:.4f} MB")
print(f"Memory Usage (Max): {max_memory_rf:.4f} MB")

Memory Usage (Min): 0.0665 MB
Memory Usage (Max): 0.0707 MB


In [29]:
# Memory usage RF Stratified
memory_usages_rf_st = [measure_memory(rf_st, X_train_st, y_train_st) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_rf_st, max_memory_rf_st = min(memory_usages_rf_st), max(memory_usages_rf_st)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_rf_st:.4f} MB")
print(f"Memory Usage (Max): {max_memory_rf_st:.4f} MB")

Memory Usage (Min): 0.0653 MB
Memory Usage (Max): 0.0701 MB


In [30]:
def measure_cpu_energy_usage(label, model, X_train, y_train):
    pyRAPL.setup()
    
    cpu_energies = []

    for _ in range(100):
        meter = pyRAPL.Measurement(label)
        
        with meter:
            model.fit(X_train, y_train)  # Latih model
            
        # Ambil hasil pengukuran
        result = meter.result

        # Konversi CPU Energy dari µJ ke J
        cpu_energy_joule = result.pkg[0] * 1e-6  # Paket energi CPU

        # Simpan nilai untuk mencari min & max
        cpu_energies.append(cpu_energy_joule)

    return min(cpu_energies), max(cpu_energies)

In [31]:
# Measure CPU energy DT
min_cpu_dt, max_cpu_dt = measure_cpu_energy_usage('dtree', dtree, X_train, y_train)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_dt:.4f} J")
print(f"CPU Energy (Max): {max_cpu_dt:.4f} J")

Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2564.1240 us
-------------------------------
PKG :
	socket 0 :  41138.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2274.2190 us
-------------------------------
PKG :
	socket 0 :  47424.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2115.7290 us
-------------------------------
PKG :
	socket 0 :  25146.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2093.2780 us
-------------------------------
PKG :
	socket 0 :  32166.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  1999.9770 us
-------------------------------
PKG :
	socket 0 :  34546.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2103.3970 us
-------------------------------
PKG :
	socket 0 :  19714.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49 2025
Duration :  2093.2840 us
-------------------------------
PKG :
	socket 0 :  20508.0000 uJ
Label : dtree
Begin : Tue Feb 18 20:34:49

In [32]:
# Measure CPU energy DT Stratified
min_cpu_dt_st, max_cpu_dt_st = measure_cpu_energy_usage('dtree_stratified', dtree_st, X_train_st, y_train_st)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_dt_st:.4f} J")
print(f"CPU Energy (Max): {max_cpu_dt_st:.4f} J")

Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  2469.4830 us
-------------------------------
PKG :
	socket 0 :  47120.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  3328.2230 us
-------------------------------
PKG :
	socket 0 :  73791.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  4817.8260 us
-------------------------------
PKG :
	socket 0 :  60852.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  2204.0410 us
-------------------------------
PKG :
	socket 0 :  41992.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  2103.4890 us
-------------------------------
PKG :
	socket 0 :  46142.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  2028.1300 us
-------------------------------
PKG :
	socket 0 :  30640.0000 uJ
Label : dtree_stratified
Begin : Tue Feb 18 20:34:49 2025
Duration :  2029.3430 us
-----------------------------

In [33]:
# Measure CPU energy RF
min_cpu_rf, max_cpu_rf = measure_cpu_energy_usage('random_forest', rf, X_train, y_train)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_rf:.4f} J")
print(f"CPU Energy (Max): {max_cpu_rf:.4f} J")

Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 121396.9480 us
-------------------------------
PKG :
	socket 0 :  1718684.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 119429.3410 us
-------------------------------
PKG :
	socket 0 :  1627010.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 119073.1810 us
-------------------------------
PKG :
	socket 0 :  1530575.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 120566.6310 us
-------------------------------
PKG :
	socket 0 :  1549252.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 118673.6160 us
-------------------------------
PKG :
	socket 0 :  1604183.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 119333.4790 us
-------------------------------
PKG :
	socket 0 :  1581295.0000 uJ
Label : random_forest
Begin : Tue Feb 18 20:34:50 2025
Duration : 120304.3750 us
-------------------------------

In [34]:
# Measure CPU energy RF Stratified
min_cpu_rf_st, max_cpu_rf_st = measure_cpu_energy_usage('random_forest_stratified', rf_st, X_train_st, y_train_st)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_rf_st:.4f} J")
print(f"CPU Energy (Max): {max_cpu_rf_st:.4f} J")

Label : random_forest_stratified
Begin : Tue Feb 18 20:35:02 2025
Duration : 143370.6450 us
-------------------------------
PKG :
	socket 0 :  1940059.0000 uJ
Label : random_forest_stratified
Begin : Tue Feb 18 20:35:02 2025
Duration : 125939.0260 us
-------------------------------
PKG :
	socket 0 :  1604488.0000 uJ
Label : random_forest_stratified
Begin : Tue Feb 18 20:35:03 2025
Duration : 132750.7100 us
-------------------------------
PKG :
	socket 0 :  1848872.0000 uJ
Label : random_forest_stratified
Begin : Tue Feb 18 20:35:03 2025
Duration : 128154.0050 us
-------------------------------
PKG :
	socket 0 :  1634395.0000 uJ
Label : random_forest_stratified
Begin : Tue Feb 18 20:35:03 2025
Duration : 169559.1580 us
-------------------------------
PKG :
	socket 0 :  1940242.0000 uJ
Label : random_forest_stratified
Begin : Tue Feb 18 20:35:03 2025
Duration : 117614.0080 us
-------------------------------
PKG :
	socket 0 :  1523861.0000 uJ
Label : random_forest_stratified
Begin : Tue F