In [37]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_curve, roc_auc_score
import seaborn as sns
from sklearn.calibration import cross_val_predict
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import plotly.express as px
import pandas as pd
import psutil
import tracemalloc
import numpy as np
import torch
import platform
import subprocess
import pyRAPL
import plotly.graph_objects as go
import timeit

In [9]:
# Load dataset
path = 'lcs.csv'

df = pd.read_csv(path)

df.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO


In [26]:
df.shape

(1157, 16)

In [3]:
df.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [4]:
print(df.dtypes)

GENDER                   object
AGE                       int64
SMOKING                   int64
YELLOW_FINGERS            int64
ANXIETY                   int64
PEER_PRESSURE             int64
CHRONIC DISEASE           int64
FATIGUE                   int64
ALLERGY                   int64
WHEEZING                  int64
ALCOHOL CONSUMING         int64
COUGHING                  int64
SHORTNESS OF BREATH       int64
SWALLOWING DIFFICULTY     int64
CHEST PAIN                int64
LUNG_CANCER              object
dtype: object


In [5]:
import plotly.express as px

# Membuat DataFrame tdf yang berisi counts dari variabel 'Level'
tdf = df['LUNG_CANCER'].value_counts().reset_index()
tdf.columns = ['LUNG_CANCER', 'count']  # Memberi nama kolom agar lebih mudah dipahami

# Menambahkan kolom persentase
total_count = tdf['count'].sum()
tdf['percentage'] = (tdf['count'] / total_count) * 100

# Membuat diagram batang interaktif menggunakan Plotly
fig = px.bar(tdf, x='LUNG_CANCER', y='count',
             title='Lung Cancer Distribution',
             labels={'LUNG_CANCER': 'Lung Cancer Category', 'count': 'Count'},
             color='LUNG_CANCER',  # Memberikan warna berbeda untuk setiap kategori
             color_discrete_sequence=px.colors.qualitative.Set1,  # Menentukan palet warna
             text=tdf.apply(lambda row: f"{int(row['count'])} ({row['percentage']:.1f}%)", axis=1))  # Menambahkan count dan persentase

# Menampilkan chart
fig.update_traces(textposition='outside',  # Menempatkan teks di luar batang
                  texttemplate='%{text}')  # Menampilkan nilai count dan persentase

# Menyesuaikan margin agar teks tidak tertutup
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),  # Memberikan ruang pada sisi kiri, kanan, atas, dan bawah
)

fig.show()


In [10]:
print(df.isnull().sum())

GENDER                   0
AGE                      0
SMOKING                  0
YELLOW_FINGERS           0
ANXIETY                  0
PEER_PRESSURE            0
CHRONIC DISEASE          0
FATIGUE                  0
ALLERGY                  0
WHEEZING                 0
ALCOHOL CONSUMING        0
COUGHING                 0
SHORTNESS OF BREATH      0
SWALLOWING DIFFICULTY    0
CHEST PAIN               0
LUNG_CANCER              0
dtype: int64


Tahap Preprocessing (Encoding tipe data)

In [11]:
# Mengubah kolom 'GENDER' menjadi numerik (F = 0, M = 1)
df['GENDER'] = df['GENDER'].map({'F': 0, 'M': 1})

# Mengubah kolom 'LUNG_CANCER' menjadi numerik (NO = 0, YES = 1)
df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'NO': 0, 'YES': 1})

# Cek hasilnya
print(df[['GENDER', 'LUNG_CANCER']].head())

   GENDER  LUNG_CANCER
0       1            1
1       1            1
2       0            0
3       1            0
4       0            0


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1157 entries, 0 to 1156
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   GENDER                 1157 non-null   int64
 1   AGE                    1157 non-null   int64
 2   SMOKING                1157 non-null   int64
 3   YELLOW_FINGERS         1157 non-null   int64
 4   ANXIETY                1157 non-null   int64
 5   PEER_PRESSURE          1157 non-null   int64
 6   CHRONIC DISEASE        1157 non-null   int64
 7   FATIGUE                1157 non-null   int64
 8   ALLERGY                1157 non-null   int64
 9   WHEEZING               1157 non-null   int64
 10  ALCOHOL CONSUMING      1157 non-null   int64
 11  COUGHING               1157 non-null   int64
 12  SHORTNESS OF BREATH    1157 non-null   int64
 13  SWALLOWING DIFFICULTY  1157 non-null   int64
 14  CHEST PAIN             1157 non-null   int64
 15  LUNG_CANCER            1157 non-null  

In [13]:
# Pisahkan data
X = df.drop(columns=['LUNG_CANCER'])  # Semua kolom kecuali 'LUNG_CANCER'
y = df['LUNG_CANCER']

In [14]:
X.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN
0,1,69,1,2,2,1,1,2,1,2,2,2,2,2,2
1,1,74,2,1,1,1,2,2,2,1,1,1,2,2,2
2,0,59,1,1,1,2,1,2,1,2,1,2,2,1,2
3,1,63,2,2,2,1,1,1,1,1,2,1,1,2,2
4,0,63,1,2,1,1,1,1,1,2,1,2,2,1,1


Tahap Split Data (80,20)

In [28]:
# # Membagi data menjadi train dan test (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Membagi data menjadi train dan test (80% train, 20% test), serta menggunakan Stratified ShuffleSplit pada kolom y
# X_train_st, X_test_st, y_train_st, y_test_st = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# # Menampilkan ukuran data training dan testing
# train_size = len(X_train)
# test_size = len(X_test)

# train_size_st = len(X_train_st)
# test_size_st = len(X_test_st)

# print(f"Train size: {train_size} data")
# print(f"Test size: {test_size} data")

# print(f"Train size Stratify: {train_size_st} data")
# print(f"Test size Stratify: {test_size_st} data")

In [29]:
# # Data distribusi tanpa stratify
# train_counts = y_train.value_counts()
# test_counts = y_test.value_counts()

# # Data distribusi dengan stratify
# train_st_counts = y_train_st.value_counts()
# test_st_counts = y_test_st.value_counts()

# # Gabung data jadi satu DataFrame
# data_combined = pd.DataFrame({
#     'Lung Cancer Category': list(train_counts.index) * 4,
#     'Proportion': list(train_counts.values) + list(test_counts.values) + list(train_st_counts.values) + list(test_st_counts.values),
#     'Dataset': ['Train'] * len(train_counts) + ['Test'] * len(test_counts) +
#                ['Train (Stratify)'] * len(train_st_counts) + ['Test (Stratify)'] * len(test_st_counts),
#     'Stratify': ['No'] * (len(train_counts) + len(test_counts)) + ['Yes'] * (len(train_st_counts) + len(test_st_counts))
# })

# # Buat plot
# fig = px.bar(
#     data_combined,
#     x='Lung Cancer Category',
#     y='Proportion',
#     color='Dataset',
#     barmode='group',
#     facet_col='Stratify',
#     color_discrete_sequence=px.colors.qualitative.Set1,
#     text='Proportion',
# )

# fig.update_traces(texttemplate='%{text:.4f}', textposition='outside')
# fig.update_layout(
#     title='Label Distribution: Stratified vs Non-Stratified',
#     yaxis_title='Proportion',
#     xaxis_title='Lung Cancer Category',
#     legend_title='Dataset',
#     height=500,
#     width=1000,
#     xaxis=dict(
#         type='category',
#         tickmode='array',
#         tickvals=[0, 1],
#         ticktext=['0', '1'],
#     )
# )

# fig.show()

In [None]:
# Metrics
def print_metrics(y_true, y_pred, label):
    precision = precision_score(y_true, y_pred, average='binary')  # biner 1 dan 0
    recall = recall_score(y_true, y_pred, average='binary')
    f1 = f1_score(y_true, y_pred, average='binary')
    print(f"{label} - Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")

In [69]:
dtree = DecisionTreeClassifier(random_state=42)

# K-Fold
kf_dtree = KFold(n_splits=10, shuffle=True, random_state=42)
scores_kf_dtree = cross_val_score(dtree, X, y, cv=kf_dtree)
y_pred_kf_dtree = cross_val_predict(dtree, X, y, cv=kf_dtree)

# Stratified K-Fold
skf_dtree = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scores_skf_dtree = cross_val_score(dtree, X, y, cv=skf_dtree)
y_pred_skf_dtree = cross_val_predict(dtree, X, y, cv=skf_dtree)

print(f"K-Fold mean accuracy: {scores_kf_dtree.mean():.2f}")
print("Standar deviasi:", np.std(scores_kf_dtree))
print_metrics(y, y_pred_kf_dtree, "K-Fold")

print(f"\nStratified K-Fold mean accuracy: {scores_skf_dtree.mean():.2f}")
print("Standar deviasi:", np.std(scores_skf_dtree))
print_metrics(y, y_pred_skf_dtree, "Stratified K-Fold")

K-Fold mean accuracy: 0.93
Standar deviasi: 0.02094840831018713
K-Fold - Precision: 0.97, Recall: 0.88, F1-Score: 0.92

Stratified K-Fold mean accuracy: 0.92
Standar deviasi: 0.01731595240902494
Stratified K-Fold - Precision: 0.96, Recall: 0.86, F1-Score: 0.91


In [70]:
y_proba_kf_dtree = cross_val_predict(dtree, X, y, cv=kf_dtree, method='predict_proba')[:, 1]
roc_auc_dtree_kf = roc_auc_score(y, y_proba_kf_dtree)

y_proba_skf_dtree = cross_val_predict(dtree, X, y, cv=skf_dtree, method='predict_proba')[:, 1]
roc_auc_dtree_skf = roc_auc_score(y, y_proba_skf_dtree)


print(f"ROC AUC Decision Tree + K-Fold: {roc_auc_dtree_kf:.2f}")
print(f"ROC AUC DEcision Tree + Stratified K-Fold: {roc_auc_dtree_skf:.2f}")

ROC AUC Decision Tree + K-Fold: 0.91
ROC AUC DEcision Tree + Stratified K-Fold: 0.89


In [72]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# K-Fold
kf_rf = KFold(n_splits=10, shuffle=True, random_state=42)
scores_kf_rf = cross_val_score(rf, X, y, cv=kf_rf)
y_pred_kf_rf = cross_val_predict(rf, X, y, cv=kf_rf)

# Stratified K-Fold
skf_rf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scores_skf_rf = cross_val_score(rf, X, y, cv=skf_rf)
y_pred_skf_rf = cross_val_predict(rf, X, y, cv=skf_rf)

print(f"K-Fold mean accuracy: {scores_kf_rf.mean():.2f}")
print("Standar deviasi:", np.std(scores_kf_rf))
print_metrics(y, y_pred_kf_rf, "K-Fold")

print(f"\nStratified K-Fold mean accuracy: {scores_skf_rf.mean():.2f}")
print("Standar deviasi:", np.std(scores_skf_rf))
print_metrics(y, y_pred_skf_rf, "Stratified K-Fold")

K-Fold mean accuracy: 0.93
Standar deviasi: 0.02091740266217186
K-Fold - Precision: 0.96, Recall: 0.89, F1-Score: 0.92

Stratified K-Fold mean accuracy: 0.93
Standar deviasi: 0.014331494624214916
Stratified K-Fold - Precision: 0.96, Recall: 0.88, F1-Score: 0.92


In [66]:
y_proba_kf_rf = cross_val_predict(rf, X, y, cv=kf_rf, method='predict_proba')[:, 1]
roc_auc_rf_kf = roc_auc_score(y, y_proba_kf_rf)

y_proba_skf_rf = cross_val_predict(rf, X, y, cv=skf_rf, method='predict_proba')[:, 1]
roc_auc_rf_skf = roc_auc_score(y, y_proba_skf_rf)


print(f"ROC AUC Random Forest + K-Fold: {roc_auc_rf_kf:.2f}")
print(f"ROC AUC Random Forest + Stratified K-Fold: {roc_auc_rf_skf:.2f}")

ROC AUC Random Forest + K-Fold: 0.94
ROC AUC Random Forest + Stratified K-Fold: 0.94


In [None]:
# # 1. Predict probabilities untuk ROC Curve
# y_probs_dtree = dtree.predict_proba(X_test)[:, 1]  
# y_probs_rf = rf.predict_proba(X_test)[:, 1]  

# y_probs_dtree_st = dtree_st.predict_proba(X_test_st)[:, 1]  
# y_probs_rf_st = rf_st.predict_proba(X_test_st)[:, 1]  

# # 2. Hitung AUC untuk semua model
# auc_dtree = roc_auc_score(y_test, y_probs_dtree)
# auc_rf = roc_auc_score(y_test, y_probs_rf)
# auc_dtree_st = roc_auc_score(y_test_st, y_probs_dtree_st)
# auc_rf_st = roc_auc_score(y_test_st, y_probs_rf_st)

# # 3. Hitung ROC Curve (FPR & TPR)
# fpr_dtree, tpr_dtree, _ = roc_curve(y_test, y_probs_dtree)
# fpr_rf, tpr_rf, _ = roc_curve(y_test, y_probs_rf)
# fpr_dtree_st, tpr_dtree_st, _ = roc_curve(y_test_st, y_probs_dtree_st)
# fpr_rf_st, tpr_rf_st, _ = roc_curve(y_test_st, y_probs_rf_st)

# # 4. Membuat ROC Curve dengan Plotly
# fig = go.Figure()

# fig.add_trace(go.Scatter(
#     x=fpr_dtree, y=tpr_dtree, mode='lines',
#     name=f'Decision Tree (AUC = {auc_dtree:.4f})',
#     line=dict(color='#1f77b4', dash='dash')  # Biru Soft
# ))

# fig.add_trace(go.Scatter(
#     x=fpr_rf, y=tpr_rf, mode='lines',
#     name=f'Random Forest (AUC = {auc_rf:.4f})',
#     line=dict(color='red', dash='dot')  # Oranye Soft
# ))

# fig.add_trace(go.Scatter(
#     x=fpr_dtree_st, y=tpr_dtree_st, mode='lines',
#     name=f'Decision Tree (ST) (AUC = {auc_dtree_st:.4f})',
#     line=dict(color='#2ca02c', dash='solid')  # Hijau Soft
# ))

# fig.add_trace(go.Scatter(
#     x=fpr_rf_st, y=tpr_rf_st, mode='lines',
#     name=f'Random Forest (ST) (AUC = {auc_rf_st:.4f})',
#     line=dict(color='blue', dash='longdash')  # Merah Soft
# ))

# # Garis referensi (Random Guess)
# fig.add_trace(go.Scatter(
#     x=[0, 1], y=[0, 1], mode='lines',
#     name='Random', line=dict(color='#7f7f7f', dash='dashdot')  # Abu-abu Netral
# ))

# fig.update_layout(
#     title="ROC Curve - Decision Tree & Random Forest (Stratified & Non-Stratified)",
#     xaxis_title="False Positive Rate",
#     yaxis_title="True Positive Rate",
#     showlegend=True,
#     font=dict(size=14),  # Ukuran teks lebih nyaman
#     legend=dict(
#         bgcolor="rgba(255, 255, 255, 0.8)",  # Latar belakang legend (semi-transparan)
#         bordercolor="black",  # Warna border hitam
#         borderwidth=1  # Ketebalan border
#     ),
# )

# # Menampilkan plot
# fig.show()

In [22]:
# Informasi CPU
cpu_model = subprocess.run(["cat", "/proc/cpuinfo"], capture_output=True, text=True).stdout
cpu_name = [line for line in cpu_model.split("\n") if "model name" in line]
cpu_name = cpu_name[0].split(":")[1].strip() if cpu_name else "Unknown"

cpu_architecture = platform.machine()
cpu_cores = psutil.cpu_count(logical=False)
cpu_threads = psutil.cpu_count(logical=True)
cpu_freq = psutil.cpu_freq().max if psutil.cpu_freq() else "Unknown"

# Informasi Cache CPU
cache_info = subprocess.run(["lscpu"], capture_output=True, text=True).stdout
l1_instruction_cache, l1_data_cache, l2_cache, l3_cache = "Unknown", "Unknown", "Unknown", "Unknown"

for line in cache_info.split("\n"):
    if "L1d cache" in line:
        l1_data_cache = line.split(":")[1].strip()
    elif "L1i cache" in line:
        l1_instruction_cache = line.split(":")[1].strip()
    elif "L2 cache" in line:
        l2_cache = line.split(":")[1].strip()
    elif "L3 cache" in line:
        l3_cache = line.split(":")[1].strip()

# Informasi RAM
ram = psutil.virtual_memory()
total_ram = round(ram.total / (1024**3), 2)  # Konversi ke GB

# Informasi Disk
disk = psutil.disk_usage('/')
total_disk = round(disk.total / (1024**3), 2)

# Informasi GPU (jika tersedia)
gpu_name = "None"
gpu_memory = "N/A"
gpu_info = "No GPU detected."

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    total_memory = torch.cuda.get_device_properties(0).total_memory
    gpu_memory = f"{round(total_memory / (1024**3), 2)} GB"

    # Gunakan nvidia-smi untuk detail tambahan
    nvidia_info = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv"], 
                                 capture_output=True, text=True).stdout
    gpu_info = nvidia_info if nvidia_info else gpu_name

# Output
print("=== System Information ===")
print(f"CPU Model        : {cpu_name}")
print(f"CPU Architecture : {cpu_architecture}")
print(f"CPU Cores        : {cpu_cores} (Physical), {cpu_threads} (Logical)")
print(f"CPU Max Frequency: {cpu_freq} MHz")
print(f"Total RAM        : {total_ram} GB")
print(f"Total Disk       : {total_disk} GB")
print(f"GPU Model        : {gpu_name} ({gpu_memory})")

print("\n=== CPU Cache Information ===")
print(f"L1 Instruction Cache: {l1_instruction_cache}")
print(f"L1 Data Cache      : {l1_data_cache}")
print(f"L2 Cache           : {l2_cache}")
print(f"L3 Cache           : {l3_cache}")

if torch.cuda.is_available():
    print("\n=== GPU Details ===")
    print(gpu_info)
else:
    print("\nNo GPU detected.")

=== System Information ===
CPU Model        : Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz
CPU Architecture : x86_64
CPU Cores        : 4 (Physical), 8 (Logical)
CPU Max Frequency: 4900.0 MHz
Total RAM        : 15.27 GB
Total Disk       : 63.0 GB
GPU Model        : None (N/A)

=== CPU Cache Information ===
L1 Instruction Cache: 128 KiB (4 instances)
L1 Data Cache      : 128 KiB (4 instances)
L2 Cache           : 1 MiB (4 instances)
L3 Cache           : 8 MiB (1 instance)

No GPU detected.


In [67]:
# mengukur waktu eksekusi DT
execution_times_dt = [timeit.timeit('dtree.fit(X_train, y_train)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_dt = min(execution_times_dt)
max_time_dt = max(execution_times_dt)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_dt:.4f} seconds")
print(f"Execution Time (Max): {max_time_dt:.4f} seconds")

Execution Time (Min): 0.0101 seconds
Execution Time (Max): 0.0236 seconds


In [68]:
# mengukur waktu eksekusi RF
execution_times_rf = [timeit.timeit('rf.fit(X_train, y_train)', globals=globals(), number=1) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_time_rf = min(execution_times_rf)
max_time_rf = max(execution_times_rf)

# Tampilkan hasil
print(f"Execution Time (Min): {min_time_rf:.4f} seconds")
print(f"Execution Time (Max): {max_time_rf:.4f} seconds")

Execution Time (Min): 0.5917 seconds
Execution Time (Max): 0.8209 seconds


In [23]:
# Fungsi untuk mendapatkan penggunaan RAM awal dan akhir dari tracemalloc
def process_memory():
    snapshot, _ = tracemalloc.get_traced_memory()
    return snapshot / (1024 * 1024)  # Konversi ke MB

In [24]:
def measure_memory(model, x_train, y_train):
    tracemalloc.start()
    initial_memory = process_memory()  # Ambil memori awal
    model.fit(x_train, y_train)  # Latih model
    final_memory = process_memory()  # Ambil memori akhir
    tracemalloc.stop()
    return final_memory - initial_memory  # Selisih pemakaian memori

In [137]:
# Memory usage DT
memory_usages_dt = [measure_memory(dtree, X_train, y_train) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_dt, max_memory_dt = min(memory_usages_dt), max(memory_usages_dt)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_dt:.4f} MB")
print(f"Memory Usage (Max): {max_memory_dt:.4f} MB")

Memory Usage (Min): 0.0009 MB
Memory Usage (Max): 0.0031 MB


In [142]:
# Memory usage DT Stratified
memory_usages_dt_st = [measure_memory(dtree_st, X_train_st, y_train_st) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_dt_st, max_memory_dt_st = min(memory_usages_dt_st), max(memory_usages_dt_st)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_dt_st:.4f} MB")
print(f"Memory Usage (Max): {max_memory_dt_st:.4f} MB")

Memory Usage (Min): 0.0009 MB
Memory Usage (Max): 0.0037 MB


In [27]:
# Memory usage RF
memory_usages_rf = [measure_memory(rf, X_train, y_train) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_rf, max_memory_rf = min(memory_usages_rf), max(memory_usages_rf)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_rf:.4f} MB")
print(f"Memory Usage (Max): {max_memory_rf:.4f} MB")

Memory Usage (Min): 0.0663 MB
Memory Usage (Max): 0.0695 MB


In [28]:
# Memory usage RF Stratified
memory_usages_rf_st = [measure_memory(rf_st, X_train_st, y_train_st) for _ in range(100)]

# Ambil nilai terkecil dan terbesar
min_memory_rf_st, max_memory_rf_st = min(memory_usages_rf_st), max(memory_usages_rf_st)

# Tampilkan hasil
print(f"Memory Usage (Min): {min_memory_rf_st:.4f} MB")
print(f"Memory Usage (Max): {max_memory_rf_st:.4f} MB")

Memory Usage (Min): 0.0665 MB
Memory Usage (Max): 0.0707 MB


In [29]:
def measure_cpu_energy_usage(label, model, X_train, y_train):
    pyRAPL.setup()
    
    cpu_energies = []

    for _ in range(100):
        meter = pyRAPL.Measurement(label)
        
        with meter:
            model.fit(X_train, y_train)  # Latih model
            
        # Ambil hasil pengukuran
        result = meter.result

        # Konversi CPU Energy dari µJ ke J
        cpu_energy_joule = result.pkg[0] * 1e-6  # Paket energi CPU

        # Simpan nilai untuk mencari min & max
        cpu_energies.append(cpu_energy_joule)

    return min(cpu_energies), max(cpu_energies)

In [30]:
# Measure CPU energy DT
min_cpu_dt, max_cpu_dt = measure_cpu_energy_usage('dtree', dtree, X_train, y_train)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_dt:.4f} J")
print(f"CPU Energy (Max): {max_cpu_dt:.4f} J")

Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  3396.3950 us
-------------------------------
PKG :
	socket 0 :  85694.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2354.3970 us
-------------------------------
PKG :
	socket 0 :  38757.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2125.7820 us
-------------------------------
PKG :
	socket 0 :  34424.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2180.3900 us
-------------------------------
PKG :
	socket 0 :  41747.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2125.3490 us
-------------------------------
PKG :
	socket 0 :  16968.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2041.5230 us
-------------------------------
PKG :
	socket 0 :  28565.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00 2025
Duration :  2032.8950 us
-------------------------------
PKG :
	socket 0 :  42541.0000 uJ
Label : dtree
Begin : Thu Mar 13 00:07:00

In [31]:
# Measure CPU energy DT Stratified
min_cpu_dt_st, max_cpu_dt_st = measure_cpu_energy_usage('dtree_stratified', dtree_st, X_train_st, y_train_st)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_dt_st:.4f} J")
print(f"CPU Energy (Max): {max_cpu_dt_st:.4f} J")

Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  3566.0860 us
-------------------------------
PKG :
	socket 0 :  70130.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2496.3260 us
-------------------------------
PKG :
	socket 0 :  28320.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2237.6710 us
-------------------------------
PKG :
	socket 0 :  20752.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2588.0830 us
-------------------------------
PKG :
	socket 0 :  73364.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2075.5770 us
-------------------------------
PKG :
	socket 0 :  26123.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2118.4860 us
-------------------------------
PKG :
	socket 0 :  8545.0000 uJ
Label : dtree_stratified
Begin : Thu Mar 13 00:07:00 2025
Duration :  2769.5490 us
------------------------------

In [32]:
# Measure CPU energy RF
min_cpu_rf, max_cpu_rf = measure_cpu_energy_usage('random_forest', rf, X_train, y_train)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_rf:.4f} J")
print(f"CPU Energy (Max): {max_cpu_rf:.4f} J")

Label : random_forest
Begin : Thu Mar 13 00:07:00 2025
Duration : 123651.2630 us
-------------------------------
PKG :
	socket 0 :  1828426.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:00 2025
Duration : 121297.7760 us
-------------------------------
PKG :
	socket 0 :  1660945.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:01 2025
Duration : 145299.4590 us
-------------------------------
PKG :
	socket 0 :  1830867.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:01 2025
Duration : 136501.7280 us
-------------------------------
PKG :
	socket 0 :  1765316.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:01 2025
Duration : 135655.8470 us
-------------------------------
PKG :
	socket 0 :  1899409.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:01 2025
Duration : 121703.8210 us
-------------------------------
PKG :
	socket 0 :  1766475.0000 uJ
Label : random_forest
Begin : Thu Mar 13 00:07:01 2025
Duration : 118860.2150 us
-------------------------------

In [33]:
# Measure CPU energy RF Stratified
min_cpu_rf_st, max_cpu_rf_st = measure_cpu_energy_usage('random_forest_stratified', rf_st, X_train_st, y_train_st)

# Tampilkan hasil
print(f"CPU Energy (Min): {min_cpu_rf_st:.4f} J")
print(f"CPU Energy (Max): {max_cpu_rf_st:.4f} J")

Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 119558.3990 us
-------------------------------
PKG :
	socket 0 :  1617977.0000 uJ
Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 116556.2540 us
-------------------------------
PKG :
	socket 0 :  1661189.0000 uJ
Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 116644.5400 us
-------------------------------
PKG :
	socket 0 :  1775875.0000 uJ
Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 117377.0350 us
-------------------------------
PKG :
	socket 0 :  1600276.0000 uJ
Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 116156.7650 us
-------------------------------
PKG :
	socket 0 :  1561702.0000 uJ
Label : random_forest_stratified
Begin : Thu Mar 13 00:07:13 2025
Duration : 115725.4030 us
-------------------------------
PKG :
	socket 0 :  1538265.0000 uJ
Label : random_forest_stratified
Begin : Thu M