# Basic Model Training

## Importing

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('clean_data.csv', sep=',')

In [2]:
# Drop Columns
X = df.drop(['released'], axis=1)
X = X.drop(['name'], axis=1).values
y = df['name'].values

## Reducing Dimensionality

#### The current data has way too many columns, and a bunch with 1.0 correlation. We want to simplify the data to make it easier to train and find more relelvant categories. 

In [24]:
from sklearn.model_selection import train_test_split 
from wpca import WPCA

# Define feature weights
w = np.array([[1, 1, 4] + [2] * 19 + [0.2] * (1994 - 24)] * 9804)

# Create and fit WPCA with feature weights
wpca = WPCA(n_components=2)
wpca.fit(X, weights=w)

# Transform features to lower dimensionality
X_wpca = wpca.transform(X)

# Create a dataframe with the transformed features and the target
X_wpca_df = pd.DataFrame(X_wpca, columns=['pca1', 'pca2'])
X_wpca_df['name'] = y

# Set the name as the index
X_wpca_df = X_wpca_df.set_index('name')

In [25]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(X_wpca_df, X_wpca_df)

In [26]:
def get_recommendations(title, k):
    idx = X_wpca_df.index.get_loc(title)
    sim_scores = cosine_sim[idx]
    sim_indices = sim_scores.argsort()[-k:][::-1]
    sim_titles = X_wpca_df.iloc[sim_indices].index
    return list(sim_titles)

In [27]:
get_recommendations("Grand Theft Auto V", 15)

['Grand Theft Auto V',
 'The Last of Us: Left Behind',
 'Final Fantasy IV (1991)',
 'Burnout 3: Takedown',
 'Too Human',
 'Bomberman (1983)',
 'Mutant Year Zero: Road to Eden',
 'Resistance 3',
 "Tony Hawk's Underground",
 'BioShock Infinite: Burial at Sea - Episode Two',
 'LEGO DC Super-Villains',
 'Fire Emblem: The Blazing Blade',
 'Monster Hunter: World',
 'Fallout: New Vegas - Ultimate Edition',
 'OKAMI HD']