# Implementasi NARM untuk pengerjaan tugas akhir

In [None]:
!pip install mlflow
!pip install ijson

Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.22.0->mlflow)
  Downloading databricks_sdk-0.52.0-py3-none-any.whl.metadata (39 kB)
Collecting fastapi<1 (from mlflow-skinny==2.22.0->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.22.0->mlflow)
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 k

In [None]:
class objectview(object):
    def __init__(self, d):
        self.__dict__ = {
            k: objectview(v) if isinstance(v, dict) else v
            for k, v in d.items()
        }

In [None]:
import pandas  as pd
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch
import mlflow
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import joblib
from sklearn.preprocessing import LabelEncoder
import ast
import gdown
from google.colab import drive
from tqdm import tqdm
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader
import random
from tqdm import tqdm
from collections import Counter, defaultdict
import re
from nltk.tokenize import word_tokenize
import nltk
import warnings
warnings.filterwarnings('ignore')
import os
import logging
logging.getLogger("mlflow").setLevel(logging.ERROR)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Preprocessing

In [None]:
PROJECT_PATH = '/content/drive/My Drive/TA/Implementasi'
DATASETS_PATH = PROJECT_PATH + '/datasets'
PROCESSED_DATA_PATH = PROJECT_PATH + '/data/processed'

## Download the sequences and label encoder

In [None]:
def download_drive_file(file_id, output_file):
    try:
        url = f'https://drive.google.com/uc?id={file_id}'

        gdown.download(url, output_file, quiet=False)

        print(f"File downloaded successfully: {output_file}")
    except Exception as e:
        print(f"An error occurred while downloading the file: {e}")

In [None]:

# download_drive_file('1dMKbq9sawiAVH9Z4Nh2c2ZsZ3nle1-1Q','label_encoder.joblib')

## Load Sequences

In [None]:
# sequences = pd.read_csv('data_joined.json')
# display(sequences.head())
# print(len(sequences))

# Load the label encoder
label_encoder = joblib.load(DATASETS_PATH + '/product_id_encoder.joblib')

In [None]:
import ijson
import json
import decimal

class DecimalEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, decimal.Decimal):
            return float(obj)
        return super(DecimalEncoder, self).default(obj)


file_path = PROCESSED_DATA_PATH + '/data_joined.json'

def count_items_in_json(file_path):
    with open(file_path, 'r') as f:
        return sum(1 for _ in ijson.items(f, 'item'))

def convert_decimal_to_float(obj):
    if isinstance(obj, decimal.Decimal):
        return float(obj)
    elif isinstance(obj, dict):
        return {k: convert_decimal_to_float(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_decimal_to_float(i) for i in obj]
    return obj

def read_sequences(type_):
    if type_ == 'normal':
        return pd.read_csv(DATASETS_PATH + '/sequences.csv')["sequence"]
    elif type_ == 'complete':
        items = []
        with open(file_path, 'r') as f:
            objects = ijson.items(f, 'item')

            for i, obj in enumerate(tqdm(objects, desc="Reading data...")):
                items.append(convert_decimal_to_float(obj))

        return items
    else:
        raise ValueError("Invalid type. Expected 'normal' or 'complete'.")

# sequences = create_full_sequence(sessions)
sequences = read_sequences('complete')


Reading data...: 1182181it [00:34, 34028.46it/s]


read the head of the data

In [None]:
print(json.dumps(sequences[:5], cls=DecimalEncoder, indent=4))
print(len(sequences))

[
    {
        "session_id": 0,
        "sequence": [
            {
                "id": "B0BFDL54Y7",
                "title": "salad in food cutter slicer veg vegetable with wokii potato container garlic for onion dicer butadi blue chopper acrylonitrile <unk> mini",
                "price": 15.99,
                "label": 489206
            },
            {
                "id": "B0BFDR9X13",
                "title": "salad in grey food cutter slicer veg vegetable with wokii potato container garlic for onion dicer butadi green chopper acrylonitrile <unk> mini",
                "price": 14.99,
                "label": 489237
            },
            {
                "id": "B07J4WF8VH",
                "title": "interchangeable salad food stainless cutter set slicer fruits inserts vegetable with potato garlic steel onion vinsani chopper white cheese blade kitchen <unk>",
                "price": 16.99,
                "label": 142933
            },
            {
                "i

## Transform the sequences

In [None]:
# def transform_the_seq(seqs):
#     return [ast.literal_eval(seq) for seq in seqs]

# transformed_seq = transform_the_seq(sequences)

## Split into train, val, and test

In [None]:
# # pick a random element
# transformed_seq = np.array(transformed_seq, dtype=object)  # Ensure it's an array

# train_sequences, test_sequences = train_test_split(transformed_seq, test_size=0.1, random_state=42)
# test_sequences, val_sequences = train_test_split(test_sequences, test_size=0.5, random_state=42)
# print(len(train_sequences), len(val_sequences), len(test_sequences))
# np.random.choice(train_sequences)  # Pick an index  #

In [None]:
train_sequences, test_sequences = train_test_split(sequences, test_size=0.1, random_state=42)
test_sequences, val_sequences = train_test_split(test_sequences, test_size=0.5, random_state=42)
print(len(train_sequences), len(val_sequences), len(test_sequences))
train_sequences[0]

1063962 59110 59109


{'session_id': 303482,
 'sequence': [{'id': 'B00GMMDUIY',
   'title': 'table small wood xx ikea lack beige coffee',
   'price': 19.93,
   'label': 48292},
  {'id': 'B0822FLX45',
   'title': 'frame rustic songmics high vasagle assembly lbtx brown rectangular and cm narrow steel pub dining table industrial easy black bar kitchen alloy',
   'price': 12.99,
   'label': 199663},
  {'id': 'B07V22FSNQ',
   'title': 'frame rustic vasagle brown and living cm people steel for heavy style dining table industrial room black duty kdtx kitchen alloy <unk> metal',
   'price': 12.99,
   'label': 175537}],
 'length': 3}

# Modelling

### Model and Data Pipeline Definition

In [None]:
import gc
import psutil
import time

MAX_VOCABS = {
    "title": 100000,
}

# Download the nltk tokenizers with better error handling
def setup_nltk():
    try:
        # Try to find if punkt is already downloaded
        nltk.data.find('tokenizers/punkt')
        print("NLTK punkt tokenizer already downloaded")
    except LookupError:
        try:
            # Try the punkt_tab first
            nltk.download('punkt_tab')
            print("Downloaded punkt_tab tokenizer")
        except Exception:
            # Fall back to regular punkt
            nltk.download('punkt')
            print("Downloaded punkt tokenizer")

setup_nltk()

feature_vocabs = defaultdict(dict)  # {feature: {token: index}}
feature_max_lengths = defaultdict(int)  # {feature: max_observed_sequence_length}
vocabs_built = False

class SessionDataset(Dataset):

    def __init__(self, sequences, dataset_path, max_vocabs=None, build_vocab=False):
        global feature_vocabs, feature_max_lengths, vocabs_built
        super(SessionDataset, self).__init__()

        # # load if it exists
        # if os.path.exists(f"{dataset_path}.pt"):
        #     self.load(dataset_path)
        #     return

        self.sequences = sequences
        self.max_vocabs = max_vocabs if max_vocabs is not None else MAX_VOCABS

        # Build vocabularies if needed
        if build_vocab or not vocabs_built:
            self.build_feature_vocabs(sequences)
            vocabs_built = True

        # Process and transform all sequences
        self.processed_sequences = []
        for sequence in tqdm(sequences, desc="Processing sequences"):
            processed_items = []
            for item in sequence["sequence"]:
                processed_item = {
                    "id": item.get("label", -1),  # Default to -1 if ID not found
                    "price": np.float32(item.get("price", 0.0)),  # Convert price to float32
                    "length": sequence["length"]
                }

                # Process text features (title, brand, color)
                for feature in self.max_vocabs.keys():
                    processed_item[feature] = self.numericalize_feature(item.get(feature, ""), feature)

                processed_items.append(processed_item)

            self.processed_sequences.append({
                "session_id": sequence.get("session_id", -1),
                "sequence": processed_items
            })

        # save the dataset
        # self.save(dataset_path)

    def build_feature_vocabs(self, sequences):
        global feature_vocabs, feature_max_lengths
        feature_counts = defaultdict(Counter)
        # First pass: compute max sequence lengths and token frequencies
        for session in tqdm(sequences, desc="Building vocabularies"):
            for item in session["sequence"]:  # Include all items in the sequence
                for feature in feature_vocabs.keys() or MAX_VOCABS.keys():
                    text = item.get(feature, "")
                    tokens = self._tokenize_text(text)

                    # Update max sequence length for the feature
                    feature_max_lengths[feature] = max(
                        feature_max_lengths[feature], len(tokens))

                    # Update token frequencies
                    feature_counts[feature].update(tokens)

        # Build vocab for each feature (capped at max_vocabs)
        for feature in MAX_VOCABS.keys():
            max_size = MAX_VOCABS[feature] - 1  # Reserve index 0 for <unk>
            tokens = [token for token, _ in feature_counts[feature].most_common(max_size)]
            feature_vocabs[feature] = {'<unk>': 0}
            feature_vocabs[feature].update({token: i+1 for i, token in enumerate(tokens)})

    def _tokenize_text(self, text):
        """Helper method to tokenize text consistently"""
        if text is None:
            return []

        if isinstance(text, (int, float)):
            text = str(text)

        cleaned_text = re.sub(r'[^\w\s]', '', text.lower().strip())
        return word_tokenize(cleaned_text)

    def numericalize_feature(self, text, feature):
        """Convert text to a sequence of integers based on the vocabulary"""
        global feature_vocabs, feature_max_lengths
        tokens = self._tokenize_text(text)

        # Convert tokens to indices
        indices = [feature_vocabs[feature].get(token, 0) for token in tokens]

        # Truncate/pad to observed max length for this feature
        max_len = feature_max_lengths[feature]
        if len(indices) > max_len:
            indices = indices[:max_len]
        else:
            indices += [0] * (max_len - len(indices))

        return torch.tensor(indices, dtype=torch.long)

    def save(self, path):
        """
        Memory-efficient saving of processed sequences.
        Uses chunking to avoid memory explosion.
        """

        # Use a more efficient format with compression
        save_path = f"{path}.pt"

        # Save in chunks to reduce memory usage
        chunk_size = 1000  # Adjust based on your dataset size and available memory
        num_chunks = (len(self.processed_sequences) + chunk_size - 1) // chunk_size

        print(f"Starting to save dataset with {len(self.processed_sequences)} sequences in {num_chunks} chunks")
        initial_memory = psutil.Process().memory_info().rss / (1024 * 1024)
        print(f"Initial memory usage: {initial_memory:.2f} MB")

        # Create a file object that supports chunking
        with open(save_path, 'wb') as f:
            # First save the number of chunks
            torch.save(num_chunks, f)

            # Save each chunk separately with progress bar
            for i in tqdm(range(num_chunks), desc="Saving dataset chunks", unit="chunk"):
                start_idx = i * chunk_size
                end_idx = min((i + 1) * chunk_size, len(self.processed_sequences))
                chunk = self.processed_sequences[start_idx:end_idx]

                # Explicitly detach tensors from computation graph to save memory
                detached_chunk = []
                for seq in chunk:
                    detached_seq = {
                        "session_id": seq["session_id"],
                        "sequence": []
                    }
                    for item in seq["sequence"]:
                        detached_item = {}
                        for key, value in item.items():
                            if isinstance(value, torch.Tensor):
                                detached_item[key] = value.detach().cpu()
                            else:
                                detached_item[key] = value
                        detached_seq["sequence"].append(detached_item)
                    detached_chunk.append(detached_seq)

                # Save chunk and clear from memory
                torch.save(detached_chunk, f)

                # Force garbage collection to free memory
                gc.collect()

                # Report memory usage periodically
                if i % 5 == 0 or i == num_chunks - 1:
                    current_memory = psutil.Process().memory_info().rss / (1024 * 1024)
                    print(f"Memory usage after chunk {i+1}/{num_chunks}: {current_memory:.2f} MB")

            final_memory = psutil.Process().memory_info().rss / (1024 * 1024)
            print(f"Dataset saved in {num_chunks} chunks to {save_path}")
            print(f"Memory change during save: {final_memory - initial_memory:.2f} MB")

    def load(self, path):
        """
        Memory-efficient loading of processed sequences.
        Loads chunks sequentially to avoid memory explosion.
        """

        load_path = f"{path}.pt"
        self.processed_sequences = []

        start_time = time.time()
        initial_memory = psutil.Process().memory_info().rss / (1024 * 1024)
        print(f"Initial memory usage: {initial_memory:.2f} MB")

        try:
            with open(load_path, 'rb') as f:
                # First load the number of chunks
                num_chunks = torch.load(f)
                print(f"Loading dataset with {num_chunks} chunks from {load_path}")

                # Load each chunk with progress bar
                for i in tqdm(range(num_chunks), desc="Loading dataset chunks", unit="chunk"):
                    chunk_start = time.time()
                    chunk = torch.load(f)
                    self.processed_sequences.extend(chunk)

                    # Force garbage collection after each chunk
                    gc.collect()

                    # Report memory and time usage periodically
                    if i % 5 == 0 or i == num_chunks - 1:
                        current_memory = psutil.Process().memory_info().rss / (1024 * 1024)
                        chunk_time = time.time() - chunk_start
                        print(f"Chunk {i+1}/{num_chunks} loaded in {chunk_time:.2f}s. Memory usage: {current_memory:.2f} MB")

                total_time = time.time() - start_time
                final_memory = psutil.Process().memory_info().rss / (1024 * 1024)
                print(f"Dataset loaded successfully from {load_path} ({len(self.processed_sequences)} sequences)")
                print(f"Total loading time: {total_time:.2f}s")
                print(f"Memory change during load: {final_memory - initial_memory:.2f} MB")
        except Exception as e:
            print(f"Error loading dataset: {e}")
            # Initialize empty list if load fails
            self.processed_sequences = []

    def __len__(self):
        return len(self.processed_sequences)

    def __getitem__(self, index):
        """
        Return map-style dataset with:
        - "id": item id
        - "title": tokenized and numericalized title
        - "price": price as float32
        """
        # Return the processed sequence at the given index
        return self.processed_sequences[index]

def collate_fn(batch):
    """
    Collate function to create batches from multiple sessions.

    For each session:
    - Input: All items except the last one (sequence[:n-1])
    - Target: The last item in the sequence (sequence[n-1])
    - Length: Length of input sequence (n-1)

    Args:
        batch: List of session dictionaries with 'sequence' field

    Returns:
        tuple: (batched_inputs, batched_targets, sequence_lengths)
            - batched_inputs: Dictionary of tensors for each feature
            - batched_targets: Tensor of target item IDs
            - sequence_lengths: Tensor of input sequence lengths
    """
    # Extract sequences from batch
    batch_inputs = []
    batch_targets = []
    sequence_lengths = []

    for session in batch:
        sequence = session["sequence"]
        # Only process sequences with at least 2 items
        if len(sequence) > 1:
            # Prepare inputs (all but last item)
            session_inputs = {
                "id": [],
                "title": [],
                "price": []
            }
            sequence_length = len(sequence)-1

            # Extract features for all items except the last one (inputs)
            for item in sequence[:-1]:
                session_inputs["id"].append(item["id"])
                session_inputs["title"].append(item["title"])
                session_inputs["price"].append(item["price"])

            # Get the last item as the target
            target_item = sequence[-1]
            target_id = target_item["id"]

            # Convert lists to tensors
            valid_session = True
            for key in session_inputs:
                if not session_inputs[key]:  # Skip empty sequences
                    valid_session = False
                    break

                # Convert to appropriate tensor types
                if key == "price":
                    # Handle price as a 1D tensor and reshape to [seq_len, 1]
                    session_inputs[key] = torch.tensor(session_inputs[key], dtype=torch.float).view(-1, 1)
                elif key == "id":
                    # Convert IDs to long tensor
                    session_inputs[key] = torch.tensor(session_inputs[key], dtype=torch.long)
                elif key == "title":
                    # For title tensors which may already be tensors with multiple dimensions
                    try:
                        session_inputs[key] = torch.stack(session_inputs[key])
                    except (RuntimeError, TypeError):
                        try:
                            # If we can't stack, they might not be tensors yet
                            tensors = []
                            for item in session_inputs[key]:
                                if isinstance(item, torch.Tensor):
                                    tensors.append(item)
                                else:
                                    # Try converting to tensor if not already
                                    tensors.append(torch.tensor(item, dtype=torch.long))
                            session_inputs[key] = torch.stack(tensors)
                        except (RuntimeError, TypeError):
                            valid_session = False
                            break
                else:
                    # For other features
                    try:
                        # First check if items are already tensors
                        if isinstance(session_inputs[key][0], torch.Tensor):
                            session_inputs[key] = torch.stack(session_inputs[key])
                        else:
                            # If not tensors, convert to long tensors
                            session_inputs[key] = torch.tensor(session_inputs[key], dtype=torch.long)
                    except (RuntimeError, TypeError):
                        # Handle case where items can't be stacked or converted
                        valid_session = False
                        break

            if valid_session:
                batch_inputs.append(session_inputs)
                batch_targets.append(target_id)
                sequence_lengths.append(sequence_length)

    # If no valid sequences were found, return None
    if not batch_inputs:
        return None, None, None

    # Pad sequences to the same length within the batch
    max_len = max(len(inputs["id"]) for inputs in batch_inputs)

    for i in range(len(batch_inputs)):
        for key in batch_inputs[i]:
            if key == "price":
                # Pad price tensor
                current_len = batch_inputs[i][key].size(0)
                if current_len < max_len:
                    padding = torch.zeros(max_len - current_len, 1, dtype=torch.float)
                    batch_inputs[i][key] = torch.cat([batch_inputs[i][key], padding], dim=0)
            else:
                # Pad ID and text feature tensors
                current_len = batch_inputs[i][key].size(0)
                if current_len < max_len:
                    padding = torch.zeros(max_len - current_len, *batch_inputs[i][key].size()[1:],
                                         dtype=batch_inputs[i][key].dtype)
                    batch_inputs[i][key] = torch.cat([batch_inputs[i][key], padding], dim=0)

    # Stack all batch data
    batched_inputs = {
        key: torch.stack([inputs[key] for inputs in batch_inputs])
        for key in batch_inputs[0]
    }

    # Convert targets to tensor
    batched_targets = torch.tensor(batch_targets, dtype=torch.long)

    # Convert sequence lengths to tensor
    sequence_lengths = torch.tensor(sequence_lengths, dtype=torch.long)

    return batched_inputs, batched_targets, sequence_lengths


class FeatureEmbedding(nn.Module):
    def __init__(self,
                 num_embeddings,
                 title_embedding_dim,
                 price_out_dim,
                 output_size,
                 ):
        super(FeatureEmbedding, self).__init__()
        self.title_embedding = nn.Embedding(num_embeddings["title"], title_embedding_dim)
        self.price_layer = nn.Linear(1, price_out_dim)
        self.output_size = output_size

        # Pool text embeddings across token dimension
        self.title_pool = nn.AdaptiveAvgPool1d(1)

        # Final projection layer
        self.linear = nn.Linear(title_embedding_dim + price_out_dim, output_size)

    def forward(self, title, price):
        # Get original batch and sequence dimensions
        batch_size, seq_len = title.size(0), title.size(1)

        # Flatten the batch and sequence dimensions for processing
        flat_title = title.view(-1, title.size(-1))  # [batch*seq, title_seq_len]
        flat_price = price.view(-1, 1)  # [batch*seq, 1]

        # Process text features through embeddings
        title_emb = self.title_embedding(flat_title)  # [batch*seq, title_seq_len, title_emb_dim]

        # Pool across token dimension for each feature
        # Transpose to get shape [batch*seq, emb_dim, seq_len] for pooling
        title_emb = title_emb.transpose(1, 2)  # [batch*seq, title_emb_dim, title_seq_len]

        # Apply pooling
        title_emb = self.title_pool(title_emb).squeeze(-1)  # [batch*seq, title_emb_dim]

        # Process price
        price_emb = self.price_layer(flat_price)  # [batch*seq, price_out_dim]

        # Concatenate all features
        all_emb = torch.cat([title_emb, price_emb], dim=-1)  # [batch*seq, all_emb_dim]

        # Final projection
        output = self.linear(all_emb)  # [batch*seq, output_size]

        # Reshape back to [batch, seq, output_size]
        output = output.view(batch_size, seq_len, -1)

        return output


class NARM(nn.Module):
    def __init__(self, n_items, hidden_size, embedding_dim, batch_size,
                 num_layers=1, feature_embedding_args=None, **kwargs):
        super().__init__()
        self.n_items = n_items + 1  # +1 for padding index
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim

        # Initialize device right away
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Embedding layers
        self.embedding = nn.Embedding(self.n_items, embedding_dim, padding_idx=0).to(self.device)

        # Feature embedding
        self.feature_embedding = None
        if feature_embedding_args:
            self.feature_embedding = FeatureEmbedding(
                num_embeddings=feature_embedding_args["num_embeddings"],
                title_embedding_dim=feature_embedding_args["title_embedding_dim"],
                price_out_dim=feature_embedding_args["price_out_dim"],
                output_size=embedding_dim
            ).to(self.device)

        # GRU and attention
        self.gru = nn.GRU(embedding_dim, hidden_size, num_layers, batch_first=True).to(self.device)
        self.attn_W1 = nn.Linear(hidden_size, hidden_size).to(self.device)
        self.attn_W2 = nn.Linear(hidden_size, hidden_size).to(self.device)
        self.attn_v = nn.Linear(hidden_size, 1).to(self.device)

        # Scoring
        self.wh = nn.Linear(2*hidden_size, hidden_size).to(self.device)
        self.wi = nn.Linear(embedding_dim, hidden_size).to(self.device)

        self.dropout = nn.Dropout(0.5).to(self.device)

    def forward(self, inputs, lengths):
        # Ensure all inputs are on the correct device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        lengths = lengths.to(self.device)

        # Embedding
        item_emb = self.embedding(inputs["id"])

        # Feature fusion
        if self.feature_embedding:
            feat_emb = self.feature_embedding(
                inputs["title"],
                inputs["price"]
            )
            item_emb = item_emb + feat_emb

        # GRU processing
        packed = pack_padded_sequence(item_emb, lengths.cpu(), batch_first=True, enforce_sorted=False)
        gru_out, hidden = self.gru(packed)
        gru_out, _ = pad_packed_sequence(gru_out, batch_first=True)

        # Attention mechanism
        h_t = hidden[-1]
        W1_h = self.attn_W1(gru_out)
        W2_ht = self.attn_W2(h_t).unsqueeze(1)
        attn_scores = self.attn_v(torch.tanh(W1_h + W2_ht)).squeeze(2)

        # Apply mask
        mask = (inputs["id"] != 0).float()
        attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
        attn_weights = F.softmax(attn_scores, dim=1)

        # Context vectors
        c_local = torch.bmm(attn_weights.unsqueeze(1), gru_out).squeeze(1)
        c_global = h_t
        c = torch.cat([c_global, c_local], dim=1)
        c = self.dropout(c)

        # Scoring
        all_items = self.embedding.weight[1:]  # Exclude padding
        scores = torch.matmul(self.wh(c), self.wi(all_items).t())

        return scores

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=self.device)


Downloaded punkt_tab tokenizer


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


## Define Training Loop

In [None]:
import mlflow
import mlflow.pytorch
import copy
import json
from datetime import datetime

MODEL_SAVE_PATH = "/content/drive/My Drive/TA/Implementasi/NARM"
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

def save_checkpoint(model, optimizer, epoch, loss, model_name, metrics=None):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    checkpoint_path = os.path.join(MODEL_SAVE_PATH, f"{model_name}.pt")

    # Create checkpoint dictionary with all necessary information
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }

    # Add metrics if provided
    if metrics is not None:
        checkpoint['metrics'] = metrics

    # Save the checkpoint
    torch.save(checkpoint, checkpoint_path)
    print(f"Model saved to {checkpoint_path}")

    return checkpoint_path

def load_checkpoint(model, optimizer, checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']

    metrics = checkpoint.get('metrics', None)

    print(f"Loaded checkpoint from epoch {epoch} with loss: {loss:.4f}")

    return model, optimizer, epoch, loss, metrics

def train(model, epochs, train_loader, val_loader=None, checkpoint_path=None, save_every=1,
          model_name="NARM", use_mlflow=True, run_name=None, **kwargs):
    lr = kwargs.get("learning_rate", 0.001)
    patience = kwargs.get("patience", 5)  # For early stopping

    # Setup optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    # Initialize tracking variables
    start_epoch = 0
    losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    best_val_acc = 0.0
    best_model_state = None
    patience_counter = 0

    # Load checkpoint if provided
    if checkpoint_path and os.path.exists(checkpoint_path):
        model, optimizer, start_epoch, _, _ = load_checkpoint(model, optimizer, checkpoint_path)
        start_epoch += 1  # Start from the next epoch

    # Set up MLflow if requested
    if use_mlflow:
        if run_name is None:
            run_name = f"{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        mlflow.set_experiment(f"NARM_Training_{model_name}")
        mlflow.start_run(run_name=run_name)

        # Log parameters
        mlflow.log_params({
            "learning_rate": lr,
            "model_name": model_name,
            "batch_size": train_loader.batch_size if hasattr(train_loader, 'batch_size') else 'unknown',
            "epochs": epochs,
            "patience": patience,
            **kwargs
        })

    try:
        for epoch in range(start_epoch, epochs + start_epoch):
            # Training phase
            model.train()
            epoch_losses = []
            correct = 0
            total = 0

            progress_bar = tqdm(train_loader, total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs+start_epoch}")
            for i, batch in enumerate(progress_bar):
                # Check if batch contains data
                if batch[0] is None:
                    continue

                inputs, targets, lengths = batch

                # Move data to model's device
                inputs = {k: v.to(model.device) for k, v in inputs.items()} if isinstance(inputs, dict) else inputs.to(model.device)
                targets = targets.to(model.device)
                lengths = lengths.to(model.device)

                # Forward pass
                optimizer.zero_grad()
                scores = model(inputs, lengths)
                loss = criterion(scores, targets)

                # Backward pass
                loss.backward()
                optimizer.step()

                # Track loss and accuracy
                loss_val = loss.item()
                epoch_losses.append(loss_val)

                # Calculate training accuracy
                _, predicted = torch.max(scores.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

                progress_bar.set_postfix({'loss': f"{loss_val:.4f}", 'acc': f"{correct/total:.4f}"})

            # Calculate average epoch loss and accuracy
            avg_epoch_loss = sum(epoch_losses) / len(epoch_losses) if epoch_losses else float('inf')
            epoch_accuracy = correct / total if total > 0 else 0
            losses.append(avg_epoch_loss)
            train_accuracies.append(epoch_accuracy)

            # Update learning rate scheduler
            scheduler.step()

            # Validation phase if val_loader provided
            if val_loader:
                res, current_val_loss = test(model, val_loader, log_results=False)
                val_acc = res.overall.accuracy
                val_losses.append(current_val_loss)
                val_accuracies.append(val_acc)

                # Log training progress
                log_line = (f"Epoch {epoch+1}/{epochs+start_epoch}, "
                      f"Train Loss: {avg_epoch_loss:.4f}, "
                      f"Train Acc: {epoch_accuracy:.4f}, "
                      f"Val Loss: {current_val_loss:.4f}, "
                      f"Val Acc: {val_acc:.4f}")
                print("\n" + log_line)

                # Write to log file
                with open(os.path.join(MODEL_SAVE_PATH, f"{model_name}_training_log.txt"), "a") as f:
                    f.write(log_line + "\n")

                # Log to MLflow
                if use_mlflow:
                    mlflow.log_metrics({
                        "train_loss": avg_epoch_loss,
                        "train_acc": epoch_accuracy,
                        "val_loss": current_val_loss,
                        "val_acc": val_acc,
                    }, step=epoch)

                # Early stopping check
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_model_state = copy.deepcopy(model.state_dict())
                    patience_counter = 0

                    # Save best model
                    best_model_path = os.path.join(MODEL_SAVE_PATH, f"{model_name}_best.pt")
                    torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': avg_epoch_loss,
                        'val_loss': current_val_loss,
                        'metrics': {'acc': val_acc}
                    }, best_model_path)

                    if use_mlflow:
                        mlflow.log_artifact(best_model_path)
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping triggered after {epoch+1} epochs")
                        # Restore best model
                        model.load_state_dict(best_model_state)
                        break
            else:
                log_line = f"Epoch {epoch+1}/{epochs+start_epoch}, Train Loss: {avg_epoch_loss:.4f}, Train Acc: {epoch_accuracy:.4f}"
                print("\n" + log_line)
                with open(os.path.join(MODEL_SAVE_PATH, f"{model_name}_training_log.txt"), "a") as f:
                    f.write(log_line + "\n")

                if use_mlflow:
                    mlflow.log_metrics({
                        "train_loss": avg_epoch_loss,
                        "train_acc": epoch_accuracy
                    }, step=epoch)

            # Save checkpoint if needed
            if (epoch + 1) % save_every == 0:
                metrics = {'acc': val_acc} if val_loader else {'train_acc': epoch_accuracy}
                checkpoint_path = save_checkpoint(
                    model, optimizer, epoch + 1, avg_epoch_loss, model_name, metrics
                )

                # Log to MLflow
                if use_mlflow:
                    mlflow.log_artifact(checkpoint_path)

        # Save the final model
        final_checkpoint_path = save_checkpoint(
            model, optimizer, epoch + 1, avg_epoch_loss, f"{model_name}_final"
        )

        # Log final model to MLflow
        if use_mlflow:
            mlflow.pytorch.log_model(model, "final_model")
            mlflow.log_artifact(final_checkpoint_path)

            # Log training history as JSON
            history = {
                "train_losses": losses,
                "val_losses": val_losses if val_loader else [],
                "train_accuracies": train_accuracies,
                "val_accuracies": val_accuracies if val_loader else []
            }
            history_path = os.path.join(MODEL_SAVE_PATH, f"{model_name}_history.json")
            with open(history_path, 'w') as f:
                json.dump(history, f)
            mlflow.log_artifact(history_path)

            # Save training history to CSV
            history_df = pd.DataFrame({
                'epoch': range(1, len(losses)+1),
                'train_loss': losses,
                'val_loss': val_losses if val_loader else [None]*len(losses),
                'train_acc': train_accuracies,
                'val_acc': val_accuracies if val_loader else [None]*len(losses)
            })
            csv_path = os.path.join(MODEL_SAVE_PATH, f"{model_name}_history.csv")
            history_df.to_csv(csv_path, index=False)
            mlflow.log_artifact(csv_path)

            # Plot and save training curves
            plt.figure(figsize=(12, 6))

            # Plot losses
            plt.subplot(1, 2, 1)
            plt.plot(history_df['epoch'], history_df['train_loss'], label='Train Loss')
            if val_loader:
                plt.plot(history_df['epoch'], history_df['val_loss'], label='Val Loss')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.title('Training and Validation Loss')
            plt.legend()

            # Plot accuracies
            plt.subplot(1, 2, 2)
            plt.plot(history_df['epoch'], history_df['train_acc'], label='Train Acc')
            if val_loader:
                plt.plot(history_df['epoch'], history_df['val_acc'], label='Val Acc')
            plt.xlabel('Epoch')
            plt.ylabel('Accuracy')
            plt.title('Training and Validation Accuracy')
            plt.legend()

            plt.tight_layout()
            plot_path = os.path.join(MODEL_SAVE_PATH, f"{model_name}_training_curves.png")
            plt.savefig(plot_path)
            plt.close()

            mlflow.log_artifact(plot_path)

    finally:
        # End MLflow run
        if use_mlflow:
            mlflow.end_run()

    # If we have a validation set and used early stopping, make sure we return the best model
    if val_loader and best_model_state is not None:
        model.load_state_dict(best_model_state)

    return model, losses, val_losses if val_loader else None

def test(model, test_loader, k=10, log_results=True, validation=True):
    """
    Test the model and calculate metrics

    Args:
        model: The model to test
        test_loader: DataLoader for test data
        k: Value for top-k metrics calculation
        log_results: Whether to print results

    Returns:
        tuple: (MRR@k, Hit@k, average_loss)
    """
    model.eval()
    criterion = nn.CrossEntropyLoss(reduction='sum')  # Sum reduction for accurate loss calculation

    total_loss = 0
    # Overall session
    mrr_at_k = 0
    hit_at_k = 0
    accuracy = 0
    total_samples = 0

    # Define classification threshold
    short = 4
    medium = 10

    # short
    mrr_at_k_short = 0
    hit_at_k_short = 0
    accuracy_short = 0
    total_samples_short = 0

    # medium
    mrr_at_k_medium = 0
    hit_at_k_medium = 0
    accuracy_medium = 0
    total_samples_medium = 0

    # long
    mrr_at_k_long = 0
    hit_at_k_long = 0
    accuracy_long = 0
    total_samples_long = 0



    with torch.no_grad():  # No gradient computation needed for testing
        for batch in tqdm(test_loader, total=len(test_loader), desc="Testing"):
            # Check if batch contains data
            if batch[0] is None:
                continue

            inputs, targets, lengths = batch

            # Move data to model's device
            inputs = {k: v.to(model.device) for k, v in inputs.items()} if isinstance(inputs, dict) else inputs.to(model.device)
            targets = targets.to(model.device)
            lengths = lengths.to(model.device)

            # Forward pass
            scores = model(inputs, lengths)
            loss = criterion(scores, targets)
            total_loss += loss.item()

            # Convert to numpy for metrics calculation
            scores_np = scores.cpu().numpy()
            targets_np = targets.cpu().numpy()

            batch_size = scores_np.shape[0]
            total_samples += batch_size
            total_samples_short += (lengths < short).sum().item()
            total_samples_medium += ((lengths >= short) & (lengths < medium)).sum().item()
            total_samples_long += (lengths >= medium).sum().item()

            # pick max value
            max_values, max_indices = torch.max(scores, dim=1)
            n_correct = (max_indices == targets).sum().item()
            n_correct_short = (max_indices[lengths < short] == targets[lengths < short]).sum().item()
            n_correct_medium = (max_indices[(lengths >= short) & (lengths < medium)] == targets[(lengths >= short) & (lengths < medium)]).sum().item()
            n_correct_long = (max_indices[lengths >= medium] == targets[lengths >= medium]).sum().item()
            accuracy += n_correct
            accuracy_short += n_correct_short
            accuracy_medium += n_correct_medium
            accuracy_long += n_correct_long


            if not validation:
                # Simple top-k implementation
                topk_values, topk_indices = torch.topk(scores, k=k, dim=1)

                # Compare with ground truth
                for i in range(batch_size):
                    if targets[i] in topk_indices[i]:
                        hit_at_k += 1
                        # Find position (1-indexed)
                        pos = (topk_indices[i] == targets[i]).nonzero(as_tuple=True)[0].item() + 1
                        mrr_at_k += 1.0 / pos

                        # check if session is short
                        if lengths[i] < short:
                            mrr_at_k_short += 1.0 / pos
                            hit_at_k_short += 1
                        elif lengths[i] < medium:
                            mrr_at_k_medium += 1.0 / pos
                            hit_at_k_medium += 1
                        else:
                            mrr_at_k_long += 1.0 / pos
                            hit_at_k_long += 1



    # Calculate average metrics
    avg_loss = total_loss / total_samples if total_samples > 0 else float('inf')
    accuracy = accuracy / total_samples if total_samples > 0 else 0
    hit_at_k = hit_at_k / total_samples if total_samples > 0 else 0
    mrr_at_k = mrr_at_k / total_samples if total_samples > 0 else 0

    # short
    accuracy_short = accuracy_short / total_samples_short if total_samples_short > 0 else 0
    hit_at_k_short = hit_at_k_short / total_samples_short if total_samples_short > 0 else 0
    mrr_at_k_short = mrr_at_k_short / total_samples_short

    # medium
    accuracy_medium = accuracy_medium / total_samples_medium if total_samples_medium > 0 else 0
    hit_at_k_medium = hit_at_k_medium / total_samples_medium if total_samples_medium > 0 else 0
    mrr_at_k_medium = mrr_at_k_medium / total_samples_medium

    # long
    accuracy_long = accuracy_long / total_samples_long if total_samples_long > 0 else 0
    hit_at_k_long = hit_at_k_long / total_samples_long if total_samples_long > 0 else 0
    mrr_at_k_long = mrr_at_k_long / total_samples_long

    # create result dictionary
    short_results = {
        "hit_at_k": hit_at_k_short,
        "mrr_at_k": mrr_at_k_short,
        "accuracy": accuracy_short
    }
    medium_results = {
        "hit_at_k": hit_at_k_medium,
        "mrr_at_k": mrr_at_k_medium,
        "accuracy": accuracy_medium
    }
    long_results = {
        "hit_at_k": hit_at_k_long,
        "mrr_at_k": mrr_at_k_long,
        "accuracy": accuracy_long
    }
    overall_results = {
        "hit_at_k": hit_at_k,
        "mrr_at_k": mrr_at_k,
        "accuracy": accuracy
    }

    results = {
        "short": short_results,
        "medium": medium_results,
        "long": long_results,
        "overall": overall_results
    }


    # scale all values to 100
    for key, value in results.items():
        for subkey, subvalue in value.items():
            value[subkey] = subvalue * 100
    # make the results dict can be viewed as object
    results = objectview(results)

    if log_results:
        print(f"Test Results:")
        print(f"  Loss: {avg_loss:.4f}")
        print(f"  Accuracy: {accuracy:.4f}")
        if not validation:
            print(f"  Hit@{k}: {hit_at_k:.4f}")
            print(f"  MRR@{k}: {mrr_at_k:.4f}")
        print(f"  Total samples: {total_samples}")

    return results, avg_loss


# Training and Testing

## Model training

In [None]:
batch_size = 512

In [None]:
path = PROJECT_PATH + "/NARM/dataset"

train_dataset = SessionDataset(train_sequences, max_vocabs=MAX_VOCABS, dataset_path=path+"/train")
val_dataset = SessionDataset(val_sequences, max_vocabs=MAX_VOCABS, dataset_path=path + "/val")
test_dataset = SessionDataset(test_sequences, max_vocabs=MAX_VOCABS, dataset_path=path + "/test")



Processing sequences: 100%|██████████| 1063962/1063962 [12:10<00:00, 1455.91it/s]
Processing sequences: 100%|██████████| 59110/59110 [00:38<00:00, 1521.79it/s]
Processing sequences: 100%|██████████| 59109/59109 [00:39<00:00, 1485.22it/s]


In [None]:

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
import joblib

label_encoder = joblib.load(DATASETS_PATH + "/product_id_encoder.joblib")

In [None]:
num_embeddings_feature = {
    "num_embeddings": {
        "title": MAX_VOCABS["title"]
    },
    "title_embedding_dim": 16,
    "price_out_dim": 4
}

args = {
    "n_items" : label_encoder.classes_.shape[0],
    "hidden_size" : 32,
    "embedding_dim" : 32,
    "n_layers" : 1,
    "batch_size" : batch_size,
    "num_embeddings_feature" : num_embeddings_feature,
}

args = objectview(args)


model = NARM(
    n_items=args.n_items,
    hidden_size=args.hidden_size,
    embedding_dim=args.embedding_dim,
    n_layers=args.n_layers,
    batch_size=args.batch_size,
    num_embeddings_feature=num_embeddings_feature
)

In [None]:
!export CUDA_LAUNCH_BLOCKING=1

In [None]:

# train
best_model, losses, val_losses = train(model, 40, train_loader, val_loader, MODEL_SAVE_PATH+"/checkpoints")

Epoch 1/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.22it/s, loss=12.9372, acc=0.0000]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 1/40, Train Loss: 13.0080, Train Acc: 0.0000, Val Loss: 12.8668, Val Acc: 0.0017
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 2/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.23it/s, loss=12.4429, acc=0.0000]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 2/40, Train Loss: 12.5612, Train Acc: 0.0000, Val Loss: 12.6806, Val Acc: 0.0118
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 3/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.25it/s, loss=12.1125, acc=0.0001]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 3/40, Train Loss: 12.3375, Train Acc: 0.0001, Val Loss: 12.6117, Val Acc: 0.0068
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 4/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.24it/s, loss=12.1637, acc=0.0002]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 4/40, Train Loss: 12.2130, Train Acc: 0.0002, Val Loss: 12.5698, Val Acc: 0.0423
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 5/40: 100%|██████████| 2079/2079 [04:24<00:00,  7.86it/s, loss=12.0893, acc=0.0007]
Testing: 100%|██████████| 116/116 [01:33<00:00,  1.25it/s]



Epoch 5/40, Train Loss: 12.0915, Train Acc: 0.0007, Val Loss: 12.5387, Val Acc: 0.1540
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 6/40: 100%|██████████| 2079/2079 [04:15<00:00,  8.13it/s, loss=12.2383, acc=0.0014]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 6/40, Train Loss: 11.9536, Train Acc: 0.0014, Val Loss: 12.5541, Val Acc: 0.1759
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 7/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.25it/s, loss=11.7223, acc=0.0016]
Testing: 100%|██████████| 116/116 [01:32<00:00,  1.26it/s]



Epoch 7/40, Train Loss: 11.9346, Train Acc: 0.0016, Val Loss: 12.5560, Val Acc: 0.1979
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 8/40: 100%|██████████| 2079/2079 [04:14<00:00,  8.17it/s, loss=11.7277, acc=0.0018]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 8/40, Train Loss: 11.9184, Train Acc: 0.0018, Val Loss: 12.5516, Val Acc: 0.2013
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 9/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.24it/s, loss=11.5841, acc=0.0019]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.26it/s]



Epoch 9/40, Train Loss: 11.9023, Train Acc: 0.0019, Val Loss: 12.5478, Val Acc: 0.2267
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 10/40: 100%|██████████| 2079/2079 [04:11<00:00,  8.26it/s, loss=12.2636, acc=0.0021]
Testing: 100%|██████████| 116/116 [01:32<00:00,  1.26it/s]



Epoch 10/40, Train Loss: 11.8865, Train Acc: 0.0021, Val Loss: 12.5483, Val Acc: 0.2436
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 11/40: 100%|██████████| 2079/2079 [04:13<00:00,  8.21it/s, loss=11.3012, acc=0.0022]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.28it/s]



Epoch 11/40, Train Loss: 11.8702, Train Acc: 0.0022, Val Loss: 12.5490, Val Acc: 0.2436
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 12/40: 100%|██████████| 2079/2079 [04:10<00:00,  8.31it/s, loss=11.3239, acc=0.0021]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 12/40, Train Loss: 11.8684, Train Acc: 0.0021, Val Loss: 12.5490, Val Acc: 0.2453
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 13/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.23it/s, loss=11.8000, acc=0.0022]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.28it/s]



Epoch 13/40, Train Loss: 11.8668, Train Acc: 0.0022, Val Loss: 12.5488, Val Acc: 0.2453
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 14/40: 100%|██████████| 2079/2079 [04:24<00:00,  7.86it/s, loss=11.3685, acc=0.0022]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 14/40, Train Loss: 11.8647, Train Acc: 0.0022, Val Loss: 12.5490, Val Acc: 0.2470
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 15/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.23it/s, loss=12.7302, acc=0.0022]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 15/40, Train Loss: 11.8635, Train Acc: 0.0022, Val Loss: 12.5484, Val Acc: 0.2453
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 16/40: 100%|██████████| 2079/2079 [04:12<00:00,  8.22it/s, loss=11.7902, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 16/40, Train Loss: 11.8614, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2470
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 17/40: 100%|██████████| 2079/2079 [04:11<00:00,  8.26it/s, loss=11.7014, acc=0.0022]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.29it/s]



Epoch 17/40, Train Loss: 11.8622, Train Acc: 0.0022, Val Loss: 12.5486, Val Acc: 0.2470
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 18/40: 100%|██████████| 2079/2079 [04:08<00:00,  8.36it/s, loss=11.7667, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.27it/s]



Epoch 18/40, Train Loss: 11.8615, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2470
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 19/40: 100%|██████████| 2079/2079 [04:10<00:00,  8.31it/s, loss=11.4386, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:31<00:00,  1.26it/s]



Epoch 19/40, Train Loss: 11.8612, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2504
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 20/40: 100%|██████████| 2079/2079 [04:10<00:00,  8.30it/s, loss=11.3474, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.28it/s]



Epoch 20/40, Train Loss: 11.8606, Train Acc: 0.0023, Val Loss: 12.5484, Val Acc: 0.2504
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 21/40: 100%|██████████| 2079/2079 [04:11<00:00,  8.27it/s, loss=12.0587, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.28it/s]



Epoch 21/40, Train Loss: 11.8613, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2504
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 22/40: 100%|██████████| 2079/2079 [04:09<00:00,  8.35it/s, loss=11.9349, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.29it/s]



Epoch 22/40, Train Loss: 11.8612, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2504
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 23/40: 100%|██████████| 2079/2079 [04:08<00:00,  8.37it/s, loss=11.9278, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.29it/s]



Epoch 23/40, Train Loss: 11.8605, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2504
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM.pt


Epoch 24/40: 100%|██████████| 2079/2079 [04:22<00:00,  7.92it/s, loss=11.9159, acc=0.0023]
Testing: 100%|██████████| 116/116 [01:30<00:00,  1.28it/s]



Epoch 24/40, Train Loss: 11.8607, Train Acc: 0.0023, Val Loss: 12.5485, Val Acc: 0.2504
Early stopping triggered after 24 epochs
Model saved to /content/drive/My Drive/Tugas Akhir/NARM/NARM_final.pt


In [None]:
k = 20

res, loss = test(best_model, test_loader, k=k, validation=False, log_results=False)
print()

print(f"Overall Test Results")
print(f"  Loss: {loss:.4f}")
print(f"  Hit@{k}: {res.overall.hit_at_k:.4f}%")
print(f"  MRR@{k}: {res.overall.mrr_at_k:.4f}%")
print(f"  Accuracy: {res.overall.accuracy:.4f}%")

# add detail for every session category
print()
print("Short Session:")
print(f"  Accuracy: {res.short.accuracy:.4f}%")
print(f"  Hit@{k}: {res.short.hit_at_k:.4f}%")
print(f"  MRR@{k}: {res.short.mrr_at_k:.4f}%")

print()
print("Medium Session:")
print(f"  Accuracy: {res.medium.accuracy:.4f}%")
print(f"  Hit@{k}: {res.medium.hit_at_k:.4f}%")
print(f"  MRR@{k}: {res.medium.mrr_at_k:.4f}%")

print()
print("Long Session:")
print(f"  Accuracy: {res.long.accuracy:.4f}%")
print(f"  Hit@{k}: {res.long.hit_at_k:.4f}%")
print(f"  MRR@{k}: {res.long.mrr_at_k:.4f}%")

Testing: 100%|██████████| 116/116 [01:39<00:00,  1.16it/s]


Overall Test Results
  Loss: 12.5357
  Hit@20: 1.3517%
  MRR@20: 0.4768%
  Accuracy: 0.2572%

Short Session:
  Accuracy: 0.3128%
  Hit@20: 1.5122%
  MRR@20: 0.5602%

Medium Session:
  Accuracy: 0.1975%
  Hit@20: 1.1703%
  MRR@20: 0.3837%

Long Session:
  Accuracy: 0.0572%
  Hit@20: 0.8295%
  MRR@20: 0.1989%





## Disconnect so that my bill will not raise

In [None]:
# from google.colab import runtime
# runtime.unassign()