# FastVSS
A Hyperdimensional Computing search engine model that receive text query input from user and retrieve multiple correlated item with time efficient and competitive performance.


## Initialization

In [1]:
from fastvss import FastVSS
import torch
import random
import os
import numpy as np
import pandas as pd
import polars as pl
import warnings

SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device('cpu')
pdf = pd.read_csv('WANDS/product.csv',sep='\t')
qdf = pd.read_csv('WANDS/query.csv',sep='\t')
ldf = pd.read_csv('WANDS/label.csv',sep='\t')

model = FastVSS(
    n_dimensions=1000,
    product_df=pdf,
    query_df=qdf,
    label_df=ldf,
    verbose=True,
    pretrain_w2v='hyper_w2v_1000.model',
    pretrain_pvs='hyper_pvs_1000.pt',
    device=device
    )

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Yosef\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


2024-08-15 11:41:16.002074|STATUS:Loading Dictionary
2024-08-15 11:41:16.507020|STATUS:Precomputing Product
2024-08-15 11:41:16.671733|STATUS:Preparing WANDS
2024-08-15 11:41:21.177418|STATUS:Building Model Done


## Leave-One-Group-Out Cross Validation
---
Takes long time (estimated 31 Hours)

In [None]:
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False

model.validate()

## Test
---

### Fitting

In [2]:
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False

model.fit()

Fitting WANDS: 100%|██████████| 233448/233448 [02:08<00:00, 1817.52it/s]


In [3]:
import time
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False
pl.Config.set_tbl_formatting("UTF8_FULL_CONDENSED")

query = 'full length mirror'
jumlah= 50
qclass = 'Wall & Accent Mirrors'
# qclass=''
model.retrieve(query, qclass,jumlah)

Predicting WANDS: 100%|██████████| 424/424 [00:00<00:00, 1723.41it/s]


 Time taken for this query: 78.125 ms 
 shape: (25, 4)
┌─────────────────────────────────┬───────────────────────┬──────┬──────────┐
│ item                            ┆ group                 ┆ type ┆ score    │
│ ---                             ┆ ---                   ┆ ---  ┆ ---      │
│ str                             ┆ str                   ┆ i64  ┆ f64      │
╞═════════════════════════════════╪═══════════════════════╪══════╪══════════╡
│ belle meade rectangular molded… ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.011423 │
│ lafontaine rustic distressed a… ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.009781 │
│ swenson rustic distressed vani… ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.008312 │
│ twig rustic beveled accent mir… ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.007837 │
│ rustic distressed mirror set    ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.007046 │
│ …                               ┆ …                     ┆ …    ┆ …        │
│ emert beveled distressed accen… ┆ Wall & Accent Mirrors ┆ 2    ┆ 0.003705 │
│ kittery

In [14]:
	
SEED = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED) 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False
pl.Config.set_tbl_formatting("UTF8_FULL_CONDENSED")

query = 'ergonomic chair'
jumlah= 50
qclass = 'Office Chairs'
model.retrieve(query, qclass,jumlah)

Predicting WANDS: 100%|██████████| 519/519 [00:00<00:00, 1975.81it/s]

 Time taken for this query: 78.125 ms 
 shape: (50, 4)
┌─────────────────────────────────┬───────────────┬──────┬──────────┐
│ item                            ┆ group         ┆ type ┆ score    │
│ ---                             ┆ ---           ┆ ---  ┆ ---      │
│ str                             ┆ str           ┆ i64  ┆ f64      │
╞═════════════════════════════════╪═══════════════╪══════╪══════════╡
│ office chair                    ┆ Office Chairs ┆ 2    ┆ 0.028614 │
│ tristani executive chair        ┆ Office Chairs ┆ 2    ┆ 0.026778 │
│ almida ergonomic executive cha… ┆ Office Chairs ┆ 2    ┆ 0.025265 │
│ lowellville patacas ergonomic … ┆ Office Chairs ┆ 2    ┆ 0.025021 │
│ mcglone ergonomic executive ch… ┆ Office Chairs ┆ 2    ┆ 0.024806 │
│ …                               ┆ …             ┆ …    ┆ …        │
│ dcarlo executive chair          ┆ Office Chairs ┆ 2    ┆ 0.017114 │
│ ageliki ergonomic conference c… ┆ Office Chairs ┆ 2    ┆ 0.017107 │
│ ellender office chair           ┆


