In [67]:
import torch

print("CUDA available:", torch.cuda.is_available())

print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))

CUDA available: True
Current device: 0
Device name: NVIDIA GeForce GTX 1650


In [68]:
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [69]:
data=pd.read_excel('dataset0828.xlsx')

In [70]:
# 데이터 포인트 수 계산
total_data_points = len(data)

# 0.1% 기준으로 설정
threshold = total_data_points * 0.001

# Machinery와 Assembly에서 0.1% 이상 등장하는 클래스만 남기기
machinery_counts = data['Machinery'].value_counts()
assembly_counts = data['Assembly'].value_counts()

# 기준을 넘는 클래스만 필터링
filtered_machinery = machinery_counts[machinery_counts >= threshold].index
filtered_assembly = assembly_counts[assembly_counts >= threshold].index

In [71]:
data_filtered = data[data['Machinery'].isin(filtered_machinery) & data['Assembly'].isin(filtered_assembly)]

# 결과 확인
print(f"Filtered data size: {data_filtered.shape[0]}")
print(f"Number of unique Machinery labels: {data_filtered['Machinery'].nunique()}")
print(f"Number of unique Assembly labels: {data_filtered['Assembly'].nunique()}")

Filtered data size: 15162
Number of unique Machinery labels: 68
Number of unique Assembly labels: 256


In [72]:
data_filtered.to_excel("filtered_dataset.xlsx", index=False)

In [133]:
data=pd.read_excel('filtered_dataset.xlsx')

In [134]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15162 entries, 0 to 15161
Data columns (total 32 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   청구서번호        15162 non-null  object 
 1   No.          15162 non-null  int64  
 2   Subject      15152 non-null  object 
 3   Machinery    15162 non-null  object 
 4   Assembly     15162 non-null  object 
 5   청구품목         15162 non-null  object 
 6   Unnamed: 6   0 non-null      float64
 7   Part No.1    15154 non-null  object 
 8   Part No.2    2614 non-null   object 
 9   청구량          15092 non-null  float64
 10  견적           14957 non-null  object 
 11  견적수량         15092 non-null  float64
 12  견적화폐         15092 non-null  object 
 13  견적단가         15162 non-null  float64
 14  발주번호         15162 non-null  object 
 15  발주처          15162 non-null  object 
 16  발주           15162 non-null  object 
 17  발주수량         15092 non-null  float64
 18  발주금액         15092 non-null  float64
 19  D/T 

In [135]:
import re

def preprocess_text(text):
    # 괄호 안의 내용 제거
    text = re.sub(r'\([^)]*\)', '', text)
    # 특수 문자 제거 (알파벳, 숫자, 일부 허용된 특수문자 제외)
    text = re.sub(r'[^\w\s\*\-\+/.,]', '', text)
    # 여러 공백을 언더스코어로 변환
    text = re.sub(r'\s+', '_', text)
    # 텍스트 중간의 연속된 언더스코어를 하나로 줄임
    text = re.sub(r'_+', '_', text)
    # 중간에 언더스코어가 불필요하게 남아있는 경우 처리
    text = re.sub(r'(?<!\w)_(?!\w)', '', text)
    # 언더스코어 앞뒤로 존재하는 특수문자 제거
    text = re.sub(r'_([^\w]+)_', '_', text)
    text = re.sub(r'_([^\w]+)$', '', text)
    text = re.sub(r'^([^\w]+)_', '', text)
    # 텍스트 끝부분의 불필요한 언더스코어 제거
    text = re.sub(r'_+$', '', text)
    # 영어 단어는 소문자로 변환
    text = ' '.join([word.lower() if re.match(r'[A-Za-z]', word) else word for word in text.split()])
    text = text.strip()
    return text

def clean_supplier_name(name):
    # 접미사 제거
    suffixes = r'\b(Corp\.?|Corporation|Company|Co\.?|Incorporated|Inc\.?|Limited|Ltd\.?|GmbH|S\.L\.|SDN\. BHD\.)\b'
    name = re.sub(suffixes, '', name, flags=re.IGNORECASE)
    # 특수 문자 제거
    name = re.sub(r'[^\w\s]', '', name)
    # 불필요한 단어 제거
    name = re.sub(r'\b(사용금지|사)\b', '', name, flags=re.IGNORECASE)
    # 공백 정리
    name = re.sub(r'\s+', ' ', name).strip()
    # 오타 수정 및 문자열 정리
    name = re.sub(r'coporation|coropration|coproration|corporration', 'corporation', name, flags=re.IGNORECASE)
    name = name.lower().strip()
    return name

In [136]:
# 각 칼럼 전처리
data['cleaned_item'] = data['청구품목'].apply(preprocess_text)
data['cleaned_supplier'] = data['발주처'].apply(clean_supplier_name)

# 전처리된 칼럼 결합
data['combined_text'] =data['cleaned_item'].fillna('') + " " + data['cleaned_supplier'].fillna('')


In [137]:
print(data[['combined_text']])

                                     combined_text
0      ge_power_pack_fork_e7 matsuiusa corporation
1      ge_power_pack_fork_e7 matsuiusa corporation
2                  nylon_54_4_1/4,_100md_50fms kti
3                  nylon_48_4_1/4,_100md_50fms kti
4                  nylon_42_4_1/4,_100md_50fms kti
...                                            ...
15157             ring-o haein corporation_cheonan
15158     ring-retaining haein corporation_cheonan
15159     sleeve-bearing haein corporation_cheonan
15160       bearing-ball haein corporation_cheonan
15161    bearing-ball_de haein corporation_cheonan

[15162 rows x 1 columns]


In [138]:
from gensim.models import FastText
import torch


# 문장을 토큰화하여 리스트로 만들어야 합니다.
sentences = [text.split() for text in data['combined_text']]

# Gensim을 사용하여 FastText 모델 학습
model = FastText(vector_size=100, window=3, min_count=1)  # 파라미터를 원하는 대로 조정 가능
model.build_vocab(sentences)  # 어휘 빌드
model.train(sentences, total_examples=len(sentences), epochs=10)  # 모델 학습

(215717, 443560)

In [139]:

# FastText 임베딩 생성
embeddings = []
for text in data['combined_text']:
    # 각 텍스트의 평균 임베딩을 구합니다
    words = text.split()
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    if word_vectors:  # 단어가 모델 어휘에 있는 경우
        embedding = torch.tensor(word_vectors).mean(dim=0)  # 단어 벡터의 평균 계산
    else:
        embedding = torch.zeros(model.vector_size)  # 단어가 없는 경우, 0 벡터로 처리
    embeddings.append(embedding)

# 임베딩 리스트를 텐서로 변환
embeddings_tensor = torch.stack(embeddings)

print(embeddings_tensor.shape)  # 확인용 출력

torch.Size([15162, 100])


In [140]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [141]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# 1. 데이터 준비 및 인코딩
machinery = data['Machinery'].values
assembly = data['Assembly'].values

machinery_encoder = LabelEncoder()
assembly_encoder = LabelEncoder()

machinery_labels = machinery_encoder.fit_transform(machinery)
assembly_labels = assembly_encoder.fit_transform(assembly)

# 2. 임베딩을 numpy 배열로 변환
X = embeddings_tensor.numpy()

# 3. Train-Test Split (각 레이블에 대해 동일한 분할 사용)
X_train_val, X_test, y_train_val_machinery, y_test_machinery, y_train_val_assembly, y_test_assembly = train_test_split(
    X, machinery_labels, assembly_labels, test_size=0.2, random_state=42
)

X_train, X_val, y_train_machinery, y_val_machinery, y_train_assembly, y_val_assembly = train_test_split(
    X_train_val, y_train_val_machinery, y_train_val_assembly, test_size=0.2, random_state=42
)

# 4. 데이터 정규화 (StandardScaler)
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_val_normalized = scaler.transform(X_val)  # 검증 데이터 정규화
X_test_normalized = scaler.transform(X_test)  # 테스트 데이터 정규화


# 5. Train 데이터를 torch Tensor로 변환
X_train_tensor = torch.tensor(X_train_normalized, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val_normalized, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_normalized, dtype=torch.float32).to(device)

y_train_machinery_tensor = torch.tensor(y_train_machinery, dtype=torch.long).to(device)
y_val_machinery_tensor = torch.tensor(y_val_machinery, dtype=torch.long).to(device)
y_test_machinery_tensor = torch.tensor(y_test_machinery, dtype=torch.long).to(device)

y_train_assembly_tensor = torch.tensor(y_train_assembly, dtype=torch.long).to(device)
y_val_assembly_tensor = torch.tensor(y_val_assembly, dtype=torch.long).to(device)
y_test_assembly_tensor = torch.tensor(y_test_assembly, dtype=torch.long).to(device)


In [142]:
class SharedTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads, num_layers, dropout=0.3):
        super(SharedTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        transformer_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(transformer_layer, num_layers=num_layers)
        
    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x)
        return x

class MachineryHead(nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(MachineryHead, self).__init__()
        self.fc = nn.Linear(hidden_dim, output_dim)  # output_dim should be 68
    
    def forward(self, x):
        return self.fc(x)

class AssemblyHead(nn.Module):
    def __init__(self, hidden_dim, machinery_output_dim, output_dim):
        super(AssemblyHead, self).__init__()
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)  # SharedTransformer output processing
        self.fc_reduce = nn.Linear(machinery_output_dim, hidden_dim)  # Reduce machinery output to match hidden_dim
        self.fc2 = nn.Sequential(
            nn.Linear(hidden_dim + hidden_dim, 256),  # combined input size is hidden_dim + hidden_dim
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)  # output_dim should be 256
        )
    
    def forward(self, x, machinery_out):
        x = self.fc1(x)  # Processed SharedTransformer output
        machinery_out_reduced = self.fc_reduce(machinery_out)  # Processed MachineryHead output
        combined_input = torch.cat((x, machinery_out_reduced), dim=-1)  # Concatenate outputs
        return self.fc2(combined_input)

In [143]:
hidden_dim = 256
num_heads = 4
num_layers = 2
input_dim = 100  # 예를 들어, 입력 특성 차원

# 각 레이블의 클래스 수에 따라 모델의 출력 차원 정의
machinery_output_dim = 68  # Number of unique Machinery labels
assembly_output_dim = 256  # Number of unique Assembly labels


In [144]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


shared_transformer = SharedTransformer(input_dim=100, hidden_dim=256, num_heads=4, num_layers=2, dropout=0.1).to(device)
print(shared_transformer)

machinery_head = MachineryHead(hidden_dim=256, output_dim=machinery_output_dim).to(device)
print(machinery_head)

assembly_head = AssemblyHead(hidden_dim=256, machinery_output_dim=machinery_output_dim, output_dim=assembly_output_dim).to(device)
print(assembly_head)


SharedTransformer(
  (embedding): Linear(in_features=100, out_features=256, bias=True)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
)
MachineryHead(
  (fc): Linear(in_features=256, out_features=68, bias=True)
)
AssemblyHead(
  (fc1): Linear(in_features=256, out_features=256, bias=True)
  (fc_reduce): Linear(in_features=68, o

In [None]:
for inputs, labels_machinery, labels_assembly in train_loader:
    inputs = inputs.to(device)
    labels_machinery = labels_machinery.to(device)
    labels_assembly = labels_assembly.to(device)
    
    # Process with the model
    shared_output = shared_transformer(inputs)
    print(f"Shared Output Shape: {shared_output.shape}")
    
    machinery_output = machinery_head(shared_output)
    print(f"Machinery Output Shape: {machinery_output.shape}")
    
    assembly_output = assembly_head(shared_output, machinery_output)
    print(f"Assembly Output Shape: {assembly_output.shape}")
    
    break  # Only checking the shape for the first batch

In [145]:
criterion_machinery = nn.CrossEntropyLoss().to(device)
criterion_assembly = nn.CrossEntropyLoss().to(device)

In [146]:
shared_transformer.to(device)
machinery_head.to(device)
assembly_head.to(device)
optimizer = optim.Adam(list(shared_transformer.parameters()) +
                       list(machinery_head.parameters()) +
                       list(assembly_head.parameters()), lr=0.001)


In [147]:
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)


In [148]:
from torch.utils.data import TensorDataset, DataLoader
# 데이터셋과 데이터로더 정의
train_dataset = TensorDataset(X_train_tensor, y_train_machinery_tensor, y_train_assembly_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_machinery_tensor, y_val_assembly_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_machinery_tensor, y_test_assembly_tensor)

batch_size = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [149]:
# 얼리 스토핑 설정
patience = 10  # 검증 손실이 개선되지 않는 에포크 수
best_val_loss = np.inf
epochs_no_improve = 0

num_epochs = 110
for epoch in range(num_epochs):
    # 모델을 학습 모드로 설정
    shared_transformer.train()
    machinery_head.train()
    assembly_head.train()
    
    total_train_loss = 0
    for inputs, labels_machinery, labels_assembly in train_loader:
        inputs = inputs.to(device)
        labels_machinery = labels_machinery.to(device)
        labels_assembly = labels_assembly.to(device)
        
        optimizer.zero_grad()
        
        # 공유된 Transformer 모델로 입력 데이터 처리
        shared_output = shared_transformer(inputs)
        
        # Machinery 예측
        machinery_output = machinery_head(shared_output)
        loss_machinery = criterion_machinery(machinery_output, labels_machinery)
        
        # Assembly 예측 (Machinery 예측 결과 활용)
        assembly_output = assembly_head(shared_output, machinery_output)
        loss_assembly = criterion_assembly(assembly_output, labels_assembly)
        
        # 총 손실 계산 및 역전파
        total_loss = loss_machinery + loss_assembly
        total_loss.backward()
        optimizer.step()
        
        total_train_loss += total_loss.item()
        
    scheduler.step()

    # 평균 학습 손실 계산
    avg_train_loss = total_train_loss / len(train_loader.dataset)
    
     # 모델을 평가 모드로 설정
    shared_transformer.eval()
    machinery_head.eval()
    assembly_head.eval()
    
    total_val_loss = 0
    with torch.no_grad():
        for inputs, labels_machinery, labels_assembly in val_loader:
            inputs = inputs.to(device)
            labels_machinery = labels_machinery.to(device)
            labels_assembly = labels_assembly.to(device)
            
            # 공유된 Transformer 모델로 입력 데이터 처리
            shared_output = shared_transformer(inputs)
            
            # Machinery 예측
            machinery_output = machinery_head(shared_output)
            loss_machinery = criterion_machinery(machinery_output, labels_machinery)
            
            # Assembly 예측 (Machinery 예측 결과 활용)
            assembly_output = assembly_head(shared_output, machinery_output)
            loss_assembly = criterion_assembly(assembly_output, labels_assembly)
            
            # 총 손실 계산
            total_loss = loss_machinery + loss_assembly
            total_val_loss += total_loss.item()
    
    # 평균 검증 손실 계산
    avg_val_loss = total_val_loss / len(val_loader.dataset)
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], "
              f"Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}")

    # 얼리 스토핑 체크
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"Validation loss did not improve for {epochs_no_improve} epochs.")
    
    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch [10/110], Train Loss: 0.1421, Val Loss: 0.1433
Epoch [20/110], Train Loss: 0.1441, Val Loss: 0.1477
Epoch [30/110], Train Loss: 0.1305, Val Loss: 0.1314
Epoch [40/110], Train Loss: 0.1226, Val Loss: 0.1281
Epoch [50/110], Train Loss: 0.1219, Val Loss: 0.1317
Epoch [60/110], Train Loss: 0.1059, Val Loss: 0.1220
Epoch [70/110], Train Loss: 0.1010, Val Loss: 0.1240
Epoch [80/110], Train Loss: 0.0963, Val Loss: 0.1184
Epoch [90/110], Train Loss: 0.0933, Val Loss: 0.1207
Epoch [100/110], Train Loss: 0.0905, Val Loss: 0.1233
Epoch [110/110], Train Loss: 0.0829, Val Loss: 0.1229


In [150]:
# 학습이 끝난 후 테스트 데이터셋에서 평가
shared_transformer.eval()
machinery_head.eval()
assembly_head.eval()

total_test_loss = 0
all_machinery_preds = []
all_assembly_preds = []
all_machinery_labels = []
all_assembly_labels = []

with torch.no_grad():
    for inputs, labels_machinery, labels_assembly in test_loader:
        inputs = inputs.to(device)
        labels_machinery = labels_machinery.to(device)
        labels_assembly = labels_assembly.to(device)

        # 공유된 Transformer 모델로 입력 데이터 처리
        shared_output = shared_transformer(inputs)

        # Machinery 예측
        machinery_output = machinery_head(shared_output)
        loss_machinery = criterion_machinery(machinery_output, labels_machinery)
        
        # Assembly 예측 (Machinery 예측 결과 활용)
        assembly_output = assembly_head(shared_output, machinery_output)
        loss_assembly = criterion_assembly(assembly_output, labels_assembly)
        
        # 총 손실 계산
        total_loss = loss_machinery + loss_assembly
        total_test_loss += total_loss.item()

        # 예측 결과 저장 (argmax로 클래스 예측)
        machinery_preds = torch.argmax(machinery_output, dim=1)
        assembly_preds = torch.argmax(assembly_output, dim=1)

        all_machinery_preds.extend(machinery_preds.cpu().numpy())
        all_assembly_preds.extend(assembly_preds.cpu().numpy())
        all_machinery_labels.extend(labels_machinery.cpu().numpy())
        all_assembly_labels.extend(labels_assembly.cpu().numpy())

# 평균 테스트 손실 계산
avg_test_loss = total_test_loss / len(test_loader.dataset)
print(f"Test Loss: {avg_test_loss:.4f}")

# 예측값과 실제값 출력 (예를 들어, 처음 10개의 데이터에 대해)
for i in range(10):
    print(f"Sample {i+1}:")
    print(f"  Predicted Machinery: {all_machinery_preds[i]}, Actual Machinery: {all_machinery_labels[i]}")
    print(f"  Predicted Assembly: {all_assembly_preds[i]}, Actual Assembly: {all_assembly_labels[i]}")

Test Loss: 0.1237
Sample 1:
  Predicted Machinery: 30, Actual Machinery: 30
  Predicted Assembly: 122, Actual Assembly: 122
Sample 2:
  Predicted Machinery: 54, Actual Machinery: 54
  Predicted Assembly: 91, Actual Assembly: 91
Sample 3:
  Predicted Machinery: 37, Actual Machinery: 37
  Predicted Assembly: 228, Actual Assembly: 165
Sample 4:
  Predicted Machinery: 7, Actual Machinery: 37
  Predicted Assembly: 233, Actual Assembly: 204
Sample 5:
  Predicted Machinery: 2, Actual Machinery: 2
  Predicted Assembly: 149, Actual Assembly: 153
Sample 6:
  Predicted Machinery: 34, Actual Machinery: 34
  Predicted Assembly: 206, Actual Assembly: 206
Sample 7:
  Predicted Machinery: 38, Actual Machinery: 38
  Predicted Assembly: 29, Actual Assembly: 41
Sample 8:
  Predicted Machinery: 7, Actual Machinery: 7
  Predicted Assembly: 233, Actual Assembly: 211
Sample 9:
  Predicted Machinery: 38, Actual Machinery: 38
  Predicted Assembly: 1, Actual Assembly: 34
Sample 10:
  Predicted Machinery: 30, Ac

In [169]:
user_input_item = "PLUG-DRAIN"
user_input_supplier = "HAEIN Coporation_Cheonan"

# 전처리 적용
cleaned_item = preprocess_text(user_input_item)
cleaned_supplier = clean_supplier_name(user_input_supplier)

# 전처리된 칼럼 결합
combined_text = cleaned_item + " " + cleaned_supplier

# 텍스트를 토큰화
words = combined_text.split()

# 각 단어를 FastText 모델에서 임베딩 벡터로 변환하고, 평균 벡터를 구함
word_vectors = [model.wv[word] for word in words if word in model.wv]
if word_vectors:
    embedding = torch.tensor(word_vectors).mean(dim=0)
else:
    embedding = torch.zeros(model.vector_size)

In [170]:

# 모델을 평가 모드로 설정
shared_transformer.eval()
machinery_head.eval()
assembly_head.eval()

# 예측을 위한 준비된 함수
def predict_top_k(input_embedding, k=5):
    input_tensor = input_embedding.unsqueeze(0).to(device)
    
    with torch.no_grad():
        shared_output = shared_transformer(input_tensor)
        machinery_output = machinery_head(shared_output)
        assembly_output = assembly_head(shared_output, machinery_output)
    
    # 상위 k개의 예측을 추출
    top_k_machinery = torch.topk(machinery_output, k, dim=1)
    top_k_assembly = torch.topk(assembly_output, k, dim=1)
    
    # 인덱스와 해당 확률을 가져옴
    top_k_machinery_indices = top_k_machinery.indices.squeeze(0).cpu().numpy()
    top_k_assembly_indices = top_k_assembly.indices.squeeze(0).cpu().numpy()
    
    return top_k_machinery_indices, top_k_assembly_indices

# 예측 실행
top_k_machinery, top_k_assembly = predict_top_k(embedding, k=5)

# 각 예측의 인덱스를 원래 문자열로 변환
top_k_machinery_labels = machinery_encoder.inverse_transform(top_k_machinery)
top_k_assembly_labels = assembly_encoder.inverse_transform(top_k_assembly)

# 결과 출력
for i in range(5):
    print(f"Rank {i+1}:")
    print(f"  Predicted Machinery: {top_k_machinery_labels[i]}")
    print(f"  Predicted Assembly: {top_k_assembly_labels[i]}")

Rank 1:
  Predicted Machinery: M/E REDUCTION GEAR
  Predicted Assembly: 5H1113 FILTER GP-ENGINE OIL
Rank 2:
  Predicted Machinery: REF SYSTEM
  Predicted Assembly: NMC CONTROLLER
Rank 3:
  Predicted Machinery: BOW THRUSTER TRANSMISSION
  Predicted Assembly: 5N8336 CYLINER HEAD GP
Rank 4:
  Predicted Machinery: NO.3 GENERATOR ENGINE
  Predicted Assembly: PARTS, PERFORMANCE
Rank 5:
  Predicted Machinery: SKIFF BOAT REDUCTION GEAR
  Predicted Assembly: LUBRICATION ASSEMBLY 3800MRHV
