<a href="https://colab.research.google.com/github/viktorngkhnh/BearingData/blob/main/CNN_2D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import os
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Mount Drive
drive.mount('/content/drive')

DATA_PATH = '/content/drive/My Drive/Bearing/DE_data'

if os.path.exists(DATA_PATH):
    print(f"{DATA_PATH}")
    print("Fld:", os.listdir(DATA_PATH))

In [None]:
# --- CELL 2: VISUALIZE RAW DATA (Soi d·ªØ li·ªáu th√¥) ---

def plot_raw_sample(root_dir):
    #Define
    target_folders = ["Normal", "Ball_007", "Ball_014", "Ball_021", "Ball_028"]

    plt.figure(figsize=(15, 10))
    plt.subplots_adjust(hspace=0.4)

    found_count = 0

    # Check fld
    if os.path.exists(root_dir):
        all_folders = os.listdir(root_dir)

        for i, target in enumerate(target_folders):

            folder_name = next((f for f in all_folders if target in f), None)

            if folder_name:
                folder_path = os.path.join(root_dir, folder_name)
                # Get .mat
                files = [f for f in os.listdir(folder_path) if f.endswith('.mat')]

                if files:
                    file_path = os.path.join(folder_path, files[0])
                    try:

                        mat = scipy.io.loadmat(file_path)
                        key = [k for k in mat.keys() if 'DE_time' in k][0]
                        signal = mat[key].flatten()

                        #plt.subplot(2, 2, i + 1)
                        plt.figure(figsize=(15, 5))
                        plt.ylim(-2 , 2)
                        plt.plot(signal[:12000])
                        plt.title(f"{target} (File: {files[0]})")
                        plt.ylabel("Amplitude")
                        plt.grid(True)
                        found_count += 1
                    except Exception as e:
                        print(f"L·ªói ƒë·ªçc file {files[0]}: {e}")
            else:
                print(f"‚ö†Ô∏è Kh√¥ng th·∫•y folder n√†o ch·ª©a t√™n '{target}'")

    if found_count == 4:
        print("‚úÖ ƒê√£ v·∫Ω ƒë·ªß 4 lo·∫°i t√≠n hi·ªáu. H√£y quan s√°t h√¨nh b√™n d∆∞·ªõi!")
    else:
        print(f"‚ö†Ô∏è Ch·ªâ t√¨m th·∫•y {found_count}/4 lo·∫°i d·ªØ li·ªáu.")
    plt.show()


plot_raw_sample(DATA_PATH)

In [None]:
def transform_1d_to_2d(signal, img_size=32):
    """
    C·∫Øt t√≠n hi·ªáu 1D th√†nh c√°c ƒëo·∫°n v√† reshape th√†nh ma tr·∫≠n vu√¥ng (·∫£nh x√°m).
    V√≠ d·ª•: 1024 m·∫´u -> (32, 32)
    """
    window_size = 2 * img_size * img_size  # 32 * 32 * 2  = 2048

    # 1. T√≠nh s·ªë l∆∞·ª£ng ·∫£nh c√≥ th·ªÉ t·∫°o ra (b·ªè ph·∫ßn d∆∞ cu·ªëi c√πng)
    num_images = len(signal) // window_size

    # 2. C·∫Øt b·ªè ph·∫ßn d∆∞ ƒë·ªÉ ƒë·∫£m b·∫£o chia h·∫øt cho 1024
    truncated_signal = signal[:num_images * window_size]

    # 3. Reshape sang (S·ªë l∆∞·ª£ng ·∫£nh, 32, 32)
    images_2d = truncated_signal.reshape(-1, img_size, img_size)

    # 4. Th√™m m·ªôt chi·ªÅu (Channel) ƒë·ªÉ PyTorch/CNN hi·ªÉu l√† ·∫£nh x√°m (N, 1, 32, 32)
    images_2d = np.expand_dims(images_2d, axis=1)

    return images_2d


In [None]:
from scipy import signal

def plot_spectrograms(root_dir):
    target_folders = ["Normal", "Ball_007", "Ball_014", "Ball_021"]
    plt.figure(figsize=(18, 12))

    found_count = 0
    all_folders = os.listdir(root_dir)

    for i, target in enumerate(target_folders):
        folder_name = next((f for f in all_folders if target in f), None)
        if folder_name:
            folder_path = os.path.join(root_dir, folder_name)
            files = [f for f in os.listdir(folder_path) if f.endswith('.mat')]

            if files:
                file_path = os.path.join(folder_path, files[0])
                mat = scipy.io.loadmat(file_path)
                key = [k for k in mat.keys() if 'DE_time' in k][0]
                sig = mat[key].flatten()[:120000] # L·∫•y m·ªôt ƒëo·∫°n ng·∫Øn ƒë·ªÉ v·∫Ω

                # --- B∆Ø·ªöC QUAN TR·ªåNG: T·∫†O SPECTROGRAM ---
                # fs: t·∫ßn s·ªë l·∫•y m·∫´u (CWRU th∆∞·ªùng l√† 12000 ho·∫∑c 48000 Hz)
                fs = 12000
                frequencies, times, Sxx = signal.spectrogram(sig, fs=fs, nperseg=512)

                plt.subplot(2, 2, i + 1)
                # D√πng pcolormesh ƒë·ªÉ v·∫Ω ma tr·∫≠n 2D
                plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud', cmap='jet')
                plt.title(f"Spectrogram: {target}")
                plt.ylabel("Frequency [Hz]")
                plt.xlabel("Time [sec]")
                plt.colorbar(label="Intensity [dB]")
                found_count += 1

    plt.tight_layout()
    plt.show()

# G·ªçi h√†m ƒë·ªÉ xem k·∫øt qu·∫£
plot_spectrograms(DATA_PATH)

In [None]:
import cv2 # Th∆∞ vi·ªán x·ª≠ l√Ω ·∫£nh

def get_spectrogram_image(sig, fs=12000, img_size=64):
    # 1. T·∫°o Spectrogram th√¥
    freqs, times, Sxx = signal.spectrogram(sig, fs=fs, nperseg=256)

    # 2. Chuy·ªÉn sang thang ƒëo Log ƒë·ªÉ n·ªïi b·∫≠t ƒë·∫∑c tr∆∞ng
    Sxx_log = 10 * np.log10(Sxx + 1e-10) # Th√™m s·ªë nh·ªè ƒë·ªÉ tr√°nh l·ªói log(0)

    # 3. Chu·∫©n h√≥a v·ªÅ kho·∫£ng [0, 255] ƒë·ªÉ th√†nh ·∫£nh x√°m chu·∫©n
    Sxx_norm = cv2.normalize(Sxx_log, None, 0, 255, cv2.NORM_MINMAX)
    Sxx_norm = Sxx_norm.astype(np.uint8)

    # 4. Resize v·ªÅ k√≠ch th∆∞·ªõc c·ªë ƒë·ªãnh (v√≠ d·ª• 64x64)
    img_resized = cv2.resize(Sxx_norm, (img_size, img_size))

    return img_resized

# --- TH·ª∞C H√ÄNH TH·ª¨ ---
# img = get_spectrogram_image(signal)
# plt.imshow(img, cmap='jet')
# print(f"K√≠ch th∆∞·ªõc ·∫£nh m·ªõi: {img.shape}")

In [None]:
class BearingSpectrogramDataset(Dataset):
    def __init__(self, root_dir, window_size=2048, stride=512, img_size=64):
        self.samples = []
        self.labels = []
        self.img_size = img_size

        target_folders = ["Normal", "Ball_007", "Ball_014", "Ball_021", "Ball_028"]
        label_map = {name: i for i, name in enumerate(target_folders)}

        all_folders = os.listdir(root_dir)
        for folder in all_folders:
            match = next((t for t in target_folders if t in folder), None)
            if match:
                label = label_map[match]
                folder_path = os.path.join(root_dir, folder)
                files = [f for f in os.listdir(folder_path) if f.endswith('.mat')]

                for file in files:
                    file_path = os.path.join(folder_path, file)
                    mat = scipy.io.loadmat(file_path)

                    key = [k for k in mat.keys() if 'time' in k and ('DE' in k or 'FE' in k)][0]
                    signal_full = mat[key].flatten()

                    # K·ª∏ THU·∫¨T OVERLAPPING:
                    # Thay v√¨ nh·∫£y b∆∞·ªõc b·∫±ng window_size, ta nh·∫£y b∆∞·ªõc b·∫±ng stride (nh·ªè h∆°n)
                    # N·∫øu window=2048 v√† stride=512, ta s·∫Ω c√≥ g·∫•p 4 l·∫ßn d·ªØ li·ªáu
                    idx = 0
                    count_per_file = 0
                    while idx + window_size <= len(signal_full):
                        segment = signal_full[idx : idx + window_size]

                        # T·∫°o Spectrogram
                        f, t, Sxx = signal.spectrogram(segment, fs=12000)
                        Sxx_log = 10 * np.log10(Sxx + 1e-10)
                        Sxx_norm = cv2.normalize(Sxx_log, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                        img = cv2.resize(Sxx_norm, (img_size, img_size))

                        self.samples.append(img)
                        self.labels.append(label)

                        idx += stride # Nh·∫£y m·ªôt b∆∞·ªõc ng·∫Øn
                        count_per_file += 1

                        # Gi·ªõi h·∫°n ƒë·ªÉ tr√°nh qu√° t·∫£i RAM n·∫øu c·∫ßn (v√≠ d·ª• 1000 ·∫£nh/file)
                        if count_per_file >= 1000: break

        self.samples = np.array(self.samples)
        self.labels = np.array(self.labels)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img = torch.tensor(self.samples[idx], dtype=torch.float32).unsqueeze(0) / 255.0
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return img, label

# Kh·ªüi t·∫°o l·∫°i v·ªõi stride=512 (Overlap 75%)
dataset_overlap = BearingSpectrogramDataset(DATA_PATH, window_size=2048, stride=512)
train_loader = DataLoader(dataset_overlap, batch_size=32, shuffle=True)

print(f"üöÄ S·ªë l∆∞·ª£ng ·∫£nh sau khi Overlap: {len(dataset_overlap)}")

In [None]:
from collections import Counter
label_counts = Counter(dataset.labels)
target_folders = ["Normal", "Ball_007", "Ball_014", "Ball_021", "Ball_028"]

print("Ph√¢n b·ªï d·ªØ li·ªáu:")
for i, name in enumerate(target_folders):
    print(f"- {name}: {label_counts[i]} ·∫£nh")

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class BearingCNN(nn.Module):
    def __init__(self):
        super(BearingCNN, self).__init__()
        # Convolutional Layer 1: In Gray layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # Convolutional Layer 2: In 16channels Out 32chanels
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)

        # Pooling 2times 64x64 to 16x16
        # Flatten layer: 32 channels * 16 * 16 = 8192 nodes
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BearingCNN().to(device)

print(f"‚úÖ ƒê√£ ƒë·ªãnh nghƒ©a v√† kh·ªüi t·∫°o 'model' tr√™n {device} th√†nh c√¥ng!")

In [None]:
import time

# 1. C·∫•u h√¨nh c√°c th√¥ng s·ªë
num_epochs = 15
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Chuy·ªÉn m√¥ h√¨nh sang GPU/CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"üöÄ B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán 15 epochs tr√™n thi·∫øt b·ªã: {device}")
start_time = time.time()

for epoch in range(num_epochs):
    model.train() # Chuy·ªÉn sang ch·∫ø ƒë·ªô hu·∫•n luy·ªán
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:

        inputs, labels = inputs.to(device), labels.to(device)


        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()


        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()


    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total

    print(f"Epoch [{epoch+1:02d}/{num_epochs}] - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.2f}%")

end_time = time.time()
print(f"\n‚úÖ Ho√†n th√†nh hu·∫•n luy·ªán trong: {(end_time - start_time)/60:.2f} ph√∫t")

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=target_folders, yticklabels=target_folders)
    plt.xlabel('(Predicted)')
    plt.ylabel('(Actual)')
    plt.title('Confusion Matrix')
    plt.show()

plot_confusion_matrix(model, train_loader)

In [None]:
# Thay ƒë·ªïi ƒë∆∞·ªùng d·∫´n n√†y t·ªõi th∆∞ m·ª•c FE c·ªßa b·∫°n
DATA_PATH_FE = '/content/drive/My Drive/Bearing/FE_data'

if os.path.exists(DATA_PATH_FE):
    print("‚úÖ ƒê√£ t√¨m th·∫•y d·ªØ li·ªáu FE ƒë·ªÉ test!")
else:
    print("‚ùå ƒê∆∞·ªùng d·∫´n FE kh√¥ng ƒë√∫ng, h√£y ki·ªÉm tra l·∫°i!")

In [None]:
# Kh·ªüi t·∫°o dataset cho FE
# Ch√∫ng ta v·∫´n d√πng window_size v√† img_size gi·ªëng nh∆∞ l√∫c train (2048 v√† 64)
test_dataset_fe = BearingSpectrogramDataset(DATA_PATH_FE, window_size=2048, img_size=64)
test_loader_fe = DataLoader(test_dataset_fe, batch_size=32, shuffle=False)

print(f"T·ªïng s·ªë ·∫£nh Spectrogram t·∫°o t·ª´ FE: {len(test_dataset_fe)}")

In [None]:
model.eval() # Chuy·ªÉn m√¥ h√¨nh sang ch·∫ø ƒë·ªô ƒë√°nh gi√° (quan tr·ªçng!)
correct = 0
total = 0

all_preds = []
all_labels = []

with torch.no_grad(): # Kh√¥ng t√≠nh to√°n gradient ƒë·ªÉ ti·∫øt ki·ªám RAM
    for inputs, labels in test_loader_fe:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy_fe = 100 * correct / total
print(f"üìä ƒê·ªô ch√≠nh x√°c (Accuracy) tr√™n t·∫≠p FE: {accuracy_fe:.2f}%")

In [None]:
cm_fe = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm_fe, annot=True, fmt='d', cmap='Oranges',
            xticklabels=target_folders, yticklabels=target_folders)
plt.title('Confusion Matrix - Testing on FE Data')
plt.show()