In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_curve, roc_auc_score
import seaborn as sns
from sklearn.calibration import cross_val_predict
from sklearn.model_selection import KFold, cross_val_score
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px
import pandas as pd
import psutil
import os
import distro
import platform
import sys
import tracemalloc
import numpy as np
from numpy import mean
import torch
import platform
import subprocess
import pyRAPL
import plotly.graph_objects as go
import time

In [3]:
# Load dataset
path = 'lcs.csv'

df = pd.read_csv(path)

df

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1152,F,74,1,1,1,1,1,1,1,1,1,2,1,1,1,NO
1153,F,75,1,1,1,1,1,1,1,1,1,2,1,1,1,NO
1154,F,76,1,1,1,1,1,1,1,1,1,2,1,1,1,NO
1155,F,77,1,1,1,1,1,1,1,1,1,2,1,1,1,NO


In [4]:
df.shape

(1157, 16)

In [5]:
df.columns

Index(['GENDER', 'AGE', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY',
       'PEER_PRESSURE', 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',
       'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',
       'SWALLOWING DIFFICULTY', 'CHEST PAIN', 'LUNG_CANCER'],
      dtype='object')

In [6]:
print(df.dtypes)

GENDER                   object
AGE                       int64
SMOKING                   int64
YELLOW_FINGERS            int64
ANXIETY                   int64
PEER_PRESSURE             int64
CHRONIC DISEASE           int64
FATIGUE                   int64
ALLERGY                   int64
WHEEZING                  int64
ALCOHOL CONSUMING         int64
COUGHING                  int64
SHORTNESS OF BREATH       int64
SWALLOWING DIFFICULTY     int64
CHEST PAIN                int64
LUNG_CANCER              object
dtype: object


In [7]:
import plotly.express as px

# Membuat DataFrame tdf yang berisi counts dari variabel 'Level'
tdf = df['LUNG_CANCER'].value_counts().reset_index()
tdf.columns = ['LUNG_CANCER', 'count']  # Memberi nama kolom agar lebih mudah dipahami

# Menambahkan kolom persentase
total_count = tdf['count'].sum()
tdf['percentage'] = (tdf['count'] / total_count) * 100

# Membuat diagram batang interaktif menggunakan Plotly
fig = px.bar(tdf, x='LUNG_CANCER', y='count',
             title='Lung Cancer Distribution',
             labels={'LUNG_CANCER': 'Lung Cancer Category', 'count': 'Count'},
             color='LUNG_CANCER',  # Memberikan warna berbeda untuk setiap kategori
             color_discrete_sequence=px.colors.qualitative.Set1,  # Menentukan palet warna
             text=tdf.apply(lambda row: f"{int(row['count'])} ({row['percentage']:.1f}%)", axis=1))  # Menambahkan count dan persentase

# Menampilkan chart
fig.update_traces(textposition='outside',  # Menempatkan teks di luar batang
                  texttemplate='%{text}')  # Menampilkan nilai count dan persentase

# Menyesuaikan margin agar teks tidak tertutup
fig.update_layout(
    margin=dict(l=50, r=50, t=50, b=50),  # Memberikan ruang pada sisi kiri, kanan, atas, dan bawah
)

fig.show()

In [8]:
print(df.isnull().sum())

GENDER                   0
AGE                      0
SMOKING                  0
YELLOW_FINGERS           0
ANXIETY                  0
PEER_PRESSURE            0
CHRONIC DISEASE          0
FATIGUE                  0
ALLERGY                  0
WHEEZING                 0
ALCOHOL CONSUMING        0
COUGHING                 0
SHORTNESS OF BREATH      0
SWALLOWING DIFFICULTY    0
CHEST PAIN               0
LUNG_CANCER              0
dtype: int64


Tahap Preprocessing (Encoding tipe data)

In [9]:
# Mengubah kolom 'GENDER' menjadi numerik (F = 0, M = 1)
df['GENDER'] = df['GENDER'].map({'F': 0, 'M': 1})

# Mengubah kolom 'LUNG_CANCER' menjadi numerik (NO = 0, YES = 1)
df['LUNG_CANCER'] = df['LUNG_CANCER'].map({'NO': 0, 'YES': 1})

# Cek hasilnya
print(df[['GENDER', 'LUNG_CANCER']].head())

   GENDER  LUNG_CANCER
0       1            1
1       1            1
2       0            0
3       1            0
4       0            0


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1157 entries, 0 to 1156
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   GENDER                 1157 non-null   int64
 1   AGE                    1157 non-null   int64
 2   SMOKING                1157 non-null   int64
 3   YELLOW_FINGERS         1157 non-null   int64
 4   ANXIETY                1157 non-null   int64
 5   PEER_PRESSURE          1157 non-null   int64
 6   CHRONIC DISEASE        1157 non-null   int64
 7   FATIGUE                1157 non-null   int64
 8   ALLERGY                1157 non-null   int64
 9   WHEEZING               1157 non-null   int64
 10  ALCOHOL CONSUMING      1157 non-null   int64
 11  COUGHING               1157 non-null   int64
 12  SHORTNESS OF BREATH    1157 non-null   int64
 13  SWALLOWING DIFFICULTY  1157 non-null   int64
 14  CHEST PAIN             1157 non-null   int64
 15  LUNG_CANCER            1157 non-null  

In [11]:
# Pisahkan data
X = df.drop(columns=['LUNG_CANCER'])  # Semua kolom kecuali 'LUNG_CANCER'
y = df['LUNG_CANCER']

In [12]:
X.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN
0,1,69,1,2,2,1,1,2,1,2,2,2,2,2,2
1,1,74,2,1,1,1,2,2,2,1,1,1,2,2,2
2,0,59,1,1,1,2,1,2,1,2,1,2,2,1,2
3,1,63,2,2,2,1,1,1,1,1,2,1,1,2,2
4,0,63,1,2,1,1,1,1,1,2,1,2,2,1,1


In [13]:
y.head()

0    1
1    1
2    0
3    0
4    0
Name: LUNG_CANCER, dtype: int64

In [14]:
# data hasil preprocessing
df

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,1,69,1,2,2,1,1,2,1,2,2,2,2,2,2,1
1,1,74,2,1,1,1,2,2,2,1,1,1,2,2,2,1
2,0,59,1,1,1,2,1,2,1,2,1,2,2,1,2,0
3,1,63,2,2,2,1,1,1,1,1,2,1,1,2,2,0
4,0,63,1,2,1,1,1,1,1,2,1,2,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1152,0,74,1,1,1,1,1,1,1,1,1,2,1,1,1,0
1153,0,75,1,1,1,1,1,1,1,1,1,2,1,1,1,0
1154,0,76,1,1,1,1,1,1,1,1,1,2,1,1,1,0
1155,0,77,1,1,1,1,1,1,1,1,1,2,1,1,1,0


kFold 10 Split

In [15]:
# Angka 42 sering dipakai sebagai default seed "tradisional" di dunia pemrograman dan machine learning.
# Ini adalah referensi dari buku "The Hitchhiker's Guide to the Galaxy" di mana 42 adalah "the answer to the ultimate question of life, the universe, and everything".
# Menurut dokumentasi resmi juga mengatakan "Popular integer random seeds are 0 and 42". link: https://scikit-learn.org/stable/glossary.html#term-random-state
kf = KFold(n_splits=10, shuffle=True, random_state=42) 

In [16]:
def evaluate_model_kfold(model, info):
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    aucs = []

    rows = []

    print(f"==== {info} ====")

    for fold, (train_index, val_index) in enumerate(kf.split(X), start=1):
        X_train, X_val = X.iloc[train_index], X.iloc[val_index]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        y_proba = model.predict_proba(X_val)[:, 1]  # Probabilitas kelas positif (1)

        acc = accuracy_score(y_val, y_pred)
        prec = precision_score(y_val, y_pred, average='binary')
        rec = recall_score(y_val, y_pred, average='binary')
        f1 = f1_score(y_val, y_pred, average='binary')
        auc = roc_auc_score(y_val, y_proba)

        accuracies.append(acc)
        precisions.append(prec)
        recalls.append(rec)
        f1_scores.append(f1)
        aucs.append(auc)

        rows.append({
            "Fold": fold,
            "Accuracy": f"{acc * 100:.1f}%",
            "Precision": f"{prec * 100:.1f}%",
            "Recall": f"{rec * 100:.1f}%",
            "F1-Score": f"{f1 * 100:.1f}%",
            "AUC": f"{auc:.2f}"
        })

        print(f"Fold {fold}: Accuracy={acc:.2f}, Precision={prec:.2f}, Recall={rec:.2f}, F1-Score={f1:.2f}, AUC={auc:.2f}")

    rows.append({
        "Fold": "Mean",
        "Accuracy": f"{np.mean(accuracies) * 100:.1f}%",
        "Precision": f"{np.mean(precisions) * 100:.1f}%",
        "Recall": f"{np.mean(recalls) * 100:.1f}%",
        "F1-Score": f"{np.mean(f1_scores) * 100:.1f}%",
        "AUC": f"{np.mean(aucs):.2f}"
    })

    print(f"\n--- Mean Metrics of {info} ---")
    print(f"Mean Accuracy:  {np.mean(accuracies):.2f}")
    print(f"Mean Precision: {np.mean(precisions):.2f}")
    print(f"Mean Recall:    {np.mean(recalls):.2f}")
    print(f"Mean F1-Score:  {np.mean(f1_scores):.2f}")
    print(f"Mean AUC:       {np.mean(aucs):.2f}")

    # Simpan ke folder "result/"
    os.makedirs("result", exist_ok=True)
    filename = f"result/{info.lower().replace(' ', '_')}_evaluation.csv"
    pd.DataFrame(rows).to_csv(filename, index=False)
    print(f"\n Hasil evaluasi disimpan ke: {filename}")

In [17]:
dtree = DecisionTreeClassifier(random_state=42)

evaluate_model_kfold(dtree, "Decision Tree")

==== Decision Tree ====
Fold 1: Accuracy=0.95, Precision=0.98, Recall=0.91, F1-Score=0.94, AUC=0.93
Fold 2: Accuracy=0.92, Precision=0.98, Recall=0.85, F1-Score=0.91, AUC=0.89
Fold 3: Accuracy=0.97, Precision=0.96, Recall=0.96, F1-Score=0.96, AUC=0.96
Fold 4: Accuracy=0.91, Precision=0.96, Recall=0.85, F1-Score=0.90, AUC=0.89
Fold 5: Accuracy=0.93, Precision=0.98, Recall=0.88, F1-Score=0.93, AUC=0.91
Fold 6: Accuracy=0.95, Precision=0.98, Recall=0.90, F1-Score=0.94, AUC=0.92
Fold 7: Accuracy=0.93, Precision=0.98, Recall=0.86, F1-Score=0.91, AUC=0.90
Fold 8: Accuracy=0.94, Precision=0.93, Recall=0.91, F1-Score=0.92, AUC=0.92
Fold 9: Accuracy=0.95, Precision=1.00, Recall=0.86, F1-Score=0.93, AUC=0.90
Fold 10: Accuracy=0.89, Precision=0.96, Recall=0.80, F1-Score=0.87, AUC=0.85

--- Mean Metrics of Decision Tree ---
Mean Accuracy:  0.93
Mean Precision: 0.97
Mean Recall:    0.88
Mean F1-Score:  0.92
Mean AUC:       0.91

 Hasil evaluasi disimpan ke: result/decision_tree_evaluation.csv


In [18]:
rf = RandomForestClassifier(random_state=42)

evaluate_model_kfold(rf, "Random Forest")

==== Random Forest ====
Fold 1: Accuracy=0.97, Precision=0.98, Recall=0.94, F1-Score=0.96, AUC=0.98
Fold 2: Accuracy=0.94, Precision=0.98, Recall=0.89, F1-Score=0.93, AUC=0.92
Fold 3: Accuracy=0.96, Precision=0.94, Recall=0.96, F1-Score=0.95, AUC=0.96
Fold 4: Accuracy=0.91, Precision=0.94, Recall=0.87, F1-Score=0.90, AUC=0.92
Fold 5: Accuracy=0.92, Precision=0.98, Recall=0.86, F1-Score=0.92, AUC=0.93
Fold 6: Accuracy=0.95, Precision=0.98, Recall=0.90, F1-Score=0.94, AUC=0.95
Fold 7: Accuracy=0.91, Precision=0.95, Recall=0.84, F1-Score=0.89, AUC=0.93
Fold 8: Accuracy=0.93, Precision=0.91, Recall=0.91, F1-Score=0.91, AUC=0.94
Fold 9: Accuracy=0.95, Precision=0.97, Recall=0.89, F1-Score=0.93, AUC=0.93
Fold 10: Accuracy=0.90, Precision=0.96, Recall=0.82, F1-Score=0.88, AUC=0.91

--- Mean Metrics of Random Forest ---
Mean Accuracy:  0.93
Mean Precision: 0.96
Mean Recall:    0.89
Mean F1-Score:  0.92
Mean AUC:       0.94

 Hasil evaluasi disimpan ke: result/random_forest_evaluation.csv


In [32]:
def measure_training_time(model):
    training_times = []

    for _, (train_index, _) in enumerate(kf.split(X), start=1):
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]

        start_time = time.time()

        model.fit(X_train, y_train)

        end_time = time.time()

        elapsed_time = end_time - start_time
        training_times.append(elapsed_time)

    return training_times

def measure_testing_time(model):
    testing_times = []

    for _, (_, test_index) in enumerate(kf.split(X), start=1):
        X_test = X.iloc[test_index]

        start_time = time.time()

        model.predict(X_test)

        end_time = time.time()

        elapsed_time = end_time - start_time
        testing_times.append(elapsed_time)

    return testing_times


In [62]:
# mengukur waktu training DT
training_times_dt = measure_training_time(dtree)

print(f"Training Time (Min): {min(training_times_dt):.4f} seconds")
print(f"Training Time (Max): {max(training_times_dt):.4f} seconds")
print(f"Training Time (Mean): {mean(training_times_dt):.4f} seconds")

Training Time (Min): 0.0120 seconds
Training Time (Max): 0.0173 seconds
Training Time (Mean): 0.0140 seconds


In [33]:
# mengukur waktu testing DT
testing_times_dt = measure_testing_time(dtree)

print(f"Testing Time (Min): {min(testing_times_dt):.4f} seconds")
print(f"Testing Time (Max): {max(testing_times_dt):.4f} seconds")
print(f"Testing Time (Mean): {mean(testing_times_dt):.4f} seconds")

Testing Time (Min): 0.0046 seconds
Testing Time (Max): 0.0090 seconds
Testing Time (Mean): 0.0066 seconds


In [34]:
# mengukur waktu training RF
training_times_rf = measure_training_time(rf)

print(f"Training Time (Min): {min(training_times_rf):.4f} seconds")
print(f"Training Time (Max): {max(training_times_rf):.4f} seconds")
print(f"Training Time (Mean): {mean(training_times_rf):.4f} seconds")

Training Time (Min): 0.6681 seconds
Training Time (Max): 0.8610 seconds
Training Time (Mean): 0.7139 seconds


In [35]:
# mengukur waktu testing RF
testing_times_rf = measure_testing_time(rf)

print(f"Testing Time (Min): {min(testing_times_rf):.4f} seconds")
print(f"Testing Time (Max): {max(testing_times_rf):.4f} seconds")
print(f"Testing Time (Mean): {mean(testing_times_rf):.4f} seconds")

Testing Time (Min): 0.0349 seconds
Testing Time (Max): 0.0446 seconds
Testing Time (Mean): 0.0386 seconds


In [46]:
def process_memory():
    current, peak = tracemalloc.get_traced_memory()
    # Konversi dari byte ke MB
    return current / (1024 * 1024), peak / (1024 * 1024)

def measure_memory_usage_training(model):
    currents = []
    peaks = []

    for train_index, _ in kf.split(X):
        X_train = X.iloc[train_index]
        y_train = y.iloc[train_index]

        tracemalloc.start()
        model.fit(X_train, y_train)
        current_memory, peak_memory = process_memory()
        tracemalloc.stop()

        currents.append(current_memory)
        peaks.append(peak_memory)

    return currents, peaks

def measure_memory_usage_testing(model):
    currents = []
    peaks = []

    for train_index, _ in kf.split(X):
        X_train = X.iloc[train_index]
        y_train = y.iloc[train_index]

        tracemalloc.start()
        model.fit(X_train, y_train)
        current_memory, peak_memory = process_memory()
        tracemalloc.stop()

        currents.append(current_memory)
        peaks.append(peak_memory)

    return currents, peaks

def measure_memory_usage_testing(model):
    currents_test = []
    peaks_test = []

    for _, test_index in kf.split(X):
        X_test = X.iloc[test_index]

        # Start measuring RAM during testing (inference)
        tracemalloc.start()
        model.predict(X_test)  # Only prediction (testing)
        current_memory_test, peak_memory_test = process_memory()
        tracemalloc.stop()

        currents_test.append(current_memory_test)
        peaks_test.append(peak_memory_test)

    return currents_test, peaks_test

In [44]:
# Memory usage training DT
currents_dt_training, peaks_dt_training = measure_memory_usage_training(dtree)

print(f"Current Traning Memory Usage (Min): {min(currents_dt_training):.4f} MB")
print(f"Current Traning Memory Usage (Max): {max(currents_dt_training):.4f} MB")
print(f"Current Traning Memory Usage (Mean): {mean(currents_dt_training):.4f} MB")
print()
print(f"Peak Traning Memory Usage (Min): {min(peaks_dt_training):.4f} MB")
print(f"Peak Traning Memory Usage (Max): {max(peaks_dt_training):.4f} MB")
print(f"Peak Traning Memory Usage (Mean): {mean(peaks_dt_training):.4f} MB")

Current Traning Memory Usage (Min): 0.0026 MB
Current Traning Memory Usage (Max): 0.0035 MB
Current Traning Memory Usage (Mean): 0.0027 MB

Peak Traning Memory Usage (Min): 0.1859 MB
Peak Traning Memory Usage (Max): 0.1867 MB
Peak Traning Memory Usage (Mean): 0.1860 MB


In [45]:
# Memory usage testing DT
currents_dt_testing, peaks_dt_testing = measure_memory_usage_testing(dtree)

print(f"Current Testing Memory Usage (Min): {min(currents_dt_testing):.4f} MB")
print(f"Current Testing Memory Usage (Max): {max(currents_dt_testing):.4f} MB")
print(f"Current Testing Memory Usage (Mean): {mean(currents_dt_testing):.4f} MB")
print()
print(f"Peak Testing Memory Usage (Min): {min(peaks_dt_testing):.4f} MB")
print(f"Peak Testing Memory Usage (Max): {max(peaks_dt_testing):.4f} MB")
print(f"Peak Testing Memory Usage (Mean): {mean(peaks_dt_testing):.4f} MB")

Current Testing Memory Usage (Min): 0.0010 MB
Current Testing Memory Usage (Max): 0.0018 MB
Current Testing Memory Usage (Mean): 0.0012 MB

Peak Testing Memory Usage (Min): 0.0227 MB
Peak Testing Memory Usage (Max): 0.0237 MB
Peak Testing Memory Usage (Mean): 0.0230 MB


In [47]:
# Memory usage training RF
currents_rf_training, peaks_rf_training = measure_memory_usage_training(rf)

print(f"Current Traning Memory Usage (Min): {min(currents_rf_training):.4f} MB")
print(f"Current Traning Memory Usage (Max): {max(currents_rf_training):.4f} MB")
print(f"Current Traning Memory Usage (Mean): {mean(currents_rf_training):.4f} MB")
print()
print(f"Peak Traning Memory Usage (Min): {min(peaks_rf_training):.4f} MB")
print(f"Peak Traning Memory Usage (Max): {max(peaks_rf_training):.4f} MB")
print(f"Peak Traning Memory Usage (Mean): {mean(peaks_rf_training):.4f} MB")

Current Traning Memory Usage (Min): 0.0678 MB
Current Traning Memory Usage (Max): 0.0736 MB
Current Traning Memory Usage (Mean): 0.0699 MB

Peak Traning Memory Usage (Min): 0.2248 MB
Peak Traning Memory Usage (Max): 0.2306 MB
Peak Traning Memory Usage (Mean): 0.2270 MB


In [48]:
# Memory usage testing RF
currents_rf_testing, peaks_rf_testing = measure_memory_usage_testing(rf)

print(f"Current Testing Memory Usage (Min): {min(currents_rf_testing):.4f} MB")
print(f"Current Testing Memory Usage (Max): {max(currents_rf_testing):.4f} MB")
print(f"Current Testing Memory Usage (Mean): {mean(currents_rf_testing):.4f} MB")
print()
print(f"Peak Testing Memory Usage (Min): {min(peaks_rf_testing):.4f} MB")
print(f"Peak Testing Memory Usage (Max): {max(peaks_rf_testing):.4f} MB")
print(f"Peak Testing Memory Usage (Mean): {mean(peaks_rf_testing):.4f} MB")

Current Testing Memory Usage (Min): 0.0083 MB
Current Testing Memory Usage (Max): 0.0113 MB
Current Testing Memory Usage (Mean): 0.0087 MB

Peak Testing Memory Usage (Min): 0.0243 MB
Peak Testing Memory Usage (Max): 0.0274 MB
Peak Testing Memory Usage (Mean): 0.0248 MB


In [19]:
# gunakan: sudo chmod -R a+r /sys/class/powercap/intel-rapl diterminal, lalu restart kernel (opsional)
# untuk handle: PermissionError: [Errno 13] Permission denied: '/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj'
def measure_energy_usage_training(label, model, showLog=False):
    pyRAPL.setup()
    cpu_energies = []
    dram_energies = []

    for fold, (train_index, _) in enumerate(kf.split(X), start=1):
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]

        meter = pyRAPL.Measurement(label)
        meter.begin()
        
        model.fit(X_train, y_train)
        
        meter.end()
        result = meter.result

        # CPU energy in Joules
        cpu_energy_joule = result.pkg[0] * 1e-6  # from microjoule to joule
        cpu_energies.append(cpu_energy_joule)

        # DRAM energy if available
        if result.dram:
            dram_energy_joule = result.dram[0] * 1e-6
            dram_energies.append(dram_energy_joule)
        else:
            dram_energies.append(0.0)  # Fallback if DRAM not supported

        if showLog:
            print(f"Fold {fold}: CPU Energy = {cpu_energy_joule:.4f} J, DRAM Energy = {dram_energies[-1]:.4f} J")

    return cpu_energies, dram_energies

def measure_energy_usage_testing(label, model, showLog=False):
    pyRAPL.setup()
    cpu_energies_test = []
    dram_energies_test = []

    for fold, (_, test_index) in enumerate(kf.split(X), start=1):
        X_test = X.iloc[test_index]

        meter = pyRAPL.Measurement(label)
        meter.begin()

        model.predict(X_test)

        meter.end()
        result = meter.result

        # CPU energy in Joules
        cpu_energy_joule = result.pkg[0] * 1e-6
        cpu_energies_test.append(cpu_energy_joule)

        # DRAM energy (if supported)
        if result.dram:
            dram_energy_joule = result.dram[0] * 1e-6
            dram_energies_test.append(dram_energy_joule)
        else:
            dram_energies_test.append(0.0)

        if showLog:
            print(f"Fold {fold}: CPU Energy (Test) = {cpu_energy_joule:.6f} J, DRAM Energy = {dram_energies_test[-1]:.6f} J")

    return cpu_energies_test, dram_energies_test


In [21]:
# Measure energy training DT
cpu_dt_training, dram_dt_training = measure_energy_usage_training('dtree training', dtree, True)

print()
print(f"CPU Energy Training (Min): {min(cpu_dt_training):.4f} J")
print(f"CPU Energy Training (Max): {max(cpu_dt_training):.4f} J")
print(f"CPU Energy Training (Mean): {mean(cpu_dt_training):.4f} J")
print()
print(f"DRAM Energy Training (Min): {min(dram_dt_training):.4f} J")
print(f"DRAM Energy Training (Max): {max(dram_dt_training):.4f} J")
print(f"DRAM Energy Training (Mean): {mean(dram_dt_training):.4f} J")

Fold 1: CPU Energy = 0.0958 J, DRAM Energy = 0.0328 J
Fold 2: CPU Energy = 0.0941 J, DRAM Energy = 0.0190 J
Fold 3: CPU Energy = 0.0742 J, DRAM Energy = 0.0156 J
Fold 4: CPU Energy = 0.0612 J, DRAM Energy = 0.0092 J
Fold 5: CPU Energy = 0.0511 J, DRAM Energy = 0.0114 J
Fold 6: CPU Energy = 0.0790 J, DRAM Energy = 0.0272 J
Fold 7: CPU Energy = 0.0458 J, DRAM Energy = 0.0134 J
Fold 8: CPU Energy = 0.0470 J, DRAM Energy = 0.0194 J
Fold 9: CPU Energy = 0.0433 J, DRAM Energy = 0.0174 J
Fold 10: CPU Energy = 0.0355 J, DRAM Energy = 0.0117 J

CPU Energy Training (Min): 0.0355 J
CPU Energy Training (Max): 0.0958 J
CPU Energy Training (Mean): 0.0627 J

DRAM Energy Training (Min): 0.0092 J
DRAM Energy Training (Max): 0.0328 J
DRAM Energy Training (Mean): 0.0177 J


In [59]:
# Measure energy testing DT
cpu_dt_testing, dram_dt_testing = measure_energy_usage_testing('dtree testing', dtree, True)

print()
print(f"CPU Energy Testing (Min): {min(cpu_dt_testing):.4f} J")
print(f"CPU Energy Testing (Max): {max(cpu_dt_testing):.4f} J")
print(f"CPU Energy Testing (Mean): {mean(cpu_dt_testing):.4f} J")
print()
print(f"DRAM Energy Testing (Min): {min(dram_dt_testing):.4f} J")
print(f"DRAM Energy Testing (Max): {max(dram_dt_testing):.4f} J")
print(f"DRAM Energy Testing (Mean): {mean(dram_dt_testing):.4f} J")

Fold 1: CPU Energy (Test) = 0.023986 J, DRAM Energy = 0.011413 J
Fold 2: CPU Energy (Test) = 0.026184 J, DRAM Energy = 0.009460 J
Fold 3: CPU Energy (Test) = 0.029235 J, DRAM Energy = 0.008118 J
Fold 4: CPU Energy (Test) = 0.042358 J, DRAM Energy = 0.016968 J
Fold 5: CPU Energy (Test) = 0.026916 J, DRAM Energy = 0.008850 J
Fold 6: CPU Energy (Test) = 0.059998 J, DRAM Energy = 0.020386 J
Fold 7: CPU Energy (Test) = 0.049682 J, DRAM Energy = 0.011230 J
Fold 8: CPU Energy (Test) = 0.041138 J, DRAM Energy = 0.013916 J
Fold 9: CPU Energy (Test) = 0.031555 J, DRAM Energy = 0.009094 J
Fold 10: CPU Energy (Test) = 0.037720 J, DRAM Energy = 0.011414 J

CPU Energy Testing (Min): 0.0240 J
CPU Energy Testing (Max): 0.0600 J
CPU Energy Testing (Mean): 0.0369 J

DRAM Energy Testing (Min): 0.0081 J
DRAM Energy Testing (Max): 0.0204 J
DRAM Energy Testing (Mean): 0.0121 J


In [60]:
# Measure energy training RF
cpu_rf_training, dram_rf_training = measure_energy_usage_training('rf training', rf, True)

print()
print(f"CPU Energy Training (Min): {min(cpu_rf_training):.4f} J")
print(f"CPU Energy Training (Max): {max(cpu_rf_training):.4f} J")
print(f"CPU Energy Training (Mean): {mean(cpu_rf_training):.4f} J")
print()
print(f"DRAM Energy Training (Min): {min(dram_rf_training):.4f} J")
print(f"DRAM Energy Training (Max): {max(dram_rf_training):.4f} J")
print(f"DRAM Energy Training (Mean): {mean(dram_rf_training):.4f} J")

Fold 1: CPU Energy = 3.1741 J, DRAM Energy = 1.3240 J
Fold 2: CPU Energy = 2.6913 J, DRAM Energy = 1.1557 J
Fold 3: CPU Energy = 2.9137 J, DRAM Energy = 1.0990 J
Fold 4: CPU Energy = 2.5187 J, DRAM Energy = 1.0668 J
Fold 5: CPU Energy = 2.5487 J, DRAM Energy = 1.0771 J
Fold 6: CPU Energy = 3.0589 J, DRAM Energy = 1.1192 J
Fold 7: CPU Energy = 2.5644 J, DRAM Energy = 1.0645 J
Fold 8: CPU Energy = 2.7835 J, DRAM Energy = 1.2015 J
Fold 9: CPU Energy = 3.7310 J, DRAM Energy = 1.5281 J
Fold 10: CPU Energy = 2.8249 J, DRAM Energy = 1.2181 J

CPU Energy Training (Min): 2.5187 J
CPU Energy Training (Max): 3.7310 J
CPU Energy Training (Mean): 2.8809 J

DRAM Energy Training (Min): 1.0645 J
DRAM Energy Training (Max): 1.5281 J
DRAM Energy Training (Mean): 1.1854 J


In [61]:
# Measure energy testing RF
cpu_rf_testing, dram_rf_testing = measure_energy_usage_testing('rf testing', rf, True)

print()
print(f"CPU Energy Testing (Min): {min(cpu_rf_testing):.4f} J")
print(f"CPU Energy Testing (Max): {max(cpu_rf_testing):.4f} J")
print(f"CPU Energy Testing (Mean): {mean(cpu_rf_testing):.4f} J")
print()
print(f"DRAM Energy Testing (Min): {min(dram_rf_testing):.4f} J")
print(f"DRAM Energy Testing (Max): {max(dram_rf_testing):.4f} J")
print(f"DRAM Energy Testing (Mean): {mean(dram_rf_testing):.4f} J")

Fold 1: CPU Energy (Test) = 0.182617 J, DRAM Energy = 0.041381 J
Fold 2: CPU Energy (Test) = 0.244689 J, DRAM Energy = 0.065368 J
Fold 3: CPU Energy (Test) = 0.188964 J, DRAM Energy = 0.069214 J
Fold 4: CPU Energy (Test) = 0.162781 J, DRAM Energy = 0.073303 J
Fold 5: CPU Energy (Test) = 0.143432 J, DRAM Energy = 0.065674 J
Fold 6: CPU Energy (Test) = 0.144043 J, DRAM Energy = 0.059448 J
Fold 7: CPU Energy (Test) = 0.179809 J, DRAM Energy = 0.083191 J
Fold 8: CPU Energy (Test) = 0.181335 J, DRAM Energy = 0.080383 J
Fold 9: CPU Energy (Test) = 0.155212 J, DRAM Energy = 0.068359 J
Fold 10: CPU Energy (Test) = 0.138549 J, DRAM Energy = 0.059509 J

CPU Energy Testing (Min): 0.1385 J
CPU Energy Testing (Max): 0.2447 J
CPU Energy Testing (Mean): 0.1721 J

DRAM Energy Testing (Min): 0.0414 J
DRAM Energy Testing (Max): 0.0832 J
DRAM Energy Testing (Mean): 0.0666 J


In [82]:
#Informasi Perangkat yang saya gunakan
# OS
print("=== Sistem Operasi (OS) Info ===")
print(f"OS Name         : {"Linux" if os.name == "posix" else "Unknown"}")
print(f"Distro Name     : {distro.name()}")
print(f"Version         : {distro.version()}")
print(f"Details         : {distro.lsb_release_info()}")
print(f"Kernel Version  : {platform.release()}")
print(f"Python Path     : {sys.executable}")
print(f"Python Version  : {platform.python_version()}")
print()

# Informasi CPU
cpu_model = subprocess.run(["cat", "/proc/cpuinfo"], capture_output=True, text=True).stdout
cpu_name = [line for line in cpu_model.split("\n") if "model name" in line]
cpu_name = cpu_name[0].split(":")[1].strip() if cpu_name else "Unknown"

cpu_architecture = platform.machine()
cpu_cores = psutil.cpu_count(logical=False)
cpu_threads = psutil.cpu_count(logical=True)
cpu_freq = psutil.cpu_freq().max if psutil.cpu_freq() else "Unknown"

# Informasi Cache CPU
cache_info = subprocess.run(["lscpu"], capture_output=True, text=True).stdout
l1_instruction_cache, l1_data_cache, l2_cache, l3_cache = "Unknown", "Unknown", "Unknown", "Unknown"

for line in cache_info.split("\n"):
    if "L1d cache" in line:
        l1_data_cache = line.split(":")[1].strip()
    elif "L1i cache" in line:
        l1_instruction_cache = line.split(":")[1].strip()
    elif "L2 cache" in line:
        l2_cache = line.split(":")[1].strip()
    elif "L3 cache" in line:
        l3_cache = line.split(":")[1].strip()

# Informasi RAM
ram = psutil.virtual_memory()
total_ram = round(ram.total / (1024**3), 2)  # Konversi ke GB

# Informasi Disk
disk = psutil.disk_usage('/')
total_disk = round(disk.total / (1024**3), 2)

# Informasi GPU (jika tersedia)
gpu_name = "None"
gpu_memory = "N/A"
gpu_info = "No GPU detected."

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    total_memory = torch.cuda.get_device_properties(0).total_memory
    gpu_memory = f"{round(total_memory / (1024**3), 2)} GB"

    # Gunakan nvidia-smi untuk detail tambahan
    nvidia_info = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv"], 
                                 capture_output=True, text=True).stdout
    gpu_info = nvidia_info if nvidia_info else gpu_name

# Output
print("=== System Information ===")
print(f"CPU Model        : {cpu_name}")
print(f"CPU Architecture : {cpu_architecture}")
print(f"CPU Cores        : {cpu_cores} (Physical), {cpu_threads} (Logical)")
print(f"CPU Max Frequency: {cpu_freq} MHz")
print(f"Total RAM        : {total_ram} GB")
print(f"Total Disk       : {total_disk} GB")
print(f"GPU Model        : {gpu_name} ({gpu_memory})")

print("\n=== CPU Cache Information ===")
print(f"L1 Instruction Cache: {l1_instruction_cache}")
print(f"L1 Data Cache      : {l1_data_cache}")
print(f"L2 Cache           : {l2_cache}")
print(f"L3 Cache           : {l3_cache}")

if torch.cuda.is_available():
    print("\n=== GPU Details ===")
    print(gpu_info)
else:
    print("\nNo GPU detected.")

=== Sistem Operasi (OS) Info ===
OS Name         : Linux
Distro Name     : Ubuntu
Version         : 24.04
Details         : {'distributor_id': 'Ubuntu', 'description': 'Ubuntu 24.04.2 LTS', 'release': '24.04', 'codename': 'noble'}
Kernel Version  : 6.11.0-17-generic
Python Path     : /home/yashlan/Documents/GitHub/lung-cancer-prediction/myvenv/bin/python
Python Version  : 3.12.3

=== System Information ===
CPU Model        : Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz
CPU Architecture : x86_64
CPU Cores        : 4 (Physical), 8 (Logical)
CPU Max Frequency: 4900.0 MHz
Total RAM        : 15.27 GB
Total Disk       : 63.0 GB
GPU Model        : None (N/A)

=== CPU Cache Information ===
L1 Instruction Cache: 128 KiB (4 instances)
L1 Data Cache      : 128 KiB (4 instances)
L2 Cache           : 1 MiB (4 instances)
L3 Cache           : 8 MiB (1 instance)

No GPU detected.
