# Task 2: Recommendation Engine

### Import libraries

In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

### Load the Data

In [14]:
df = pd.read_csv('../data/train.csv')
df.head()

Unnamed: 0,listing_id,title,make,model,description,manufactured,original_reg_date,reg_date,type_of_vehicle,category,...,mileage,omv,arf,opc_scheme,lifespan,eco_category,features,accessories,indicative_price,price
0,1030324,BMW 3 Series 320i Gran Turismo M-Sport,bmw,320i,1 owner! 320i gt m-sports model! big brake kit...,2013.0,,09-dec-2013,luxury sedan,"parf car, premium ad car, low mileage car",...,73000.0,45330.0,50462.0,,,uncategorized,"5 doors gt, powerful and fuel efficient 2.0l t...","bmw i-drive, navigation, bluetooth/aux/usb inp...",,71300.0
1,1021510,Toyota Hiace 3.0M,,hiace,high loan available! low mileage unit. wear an...,2014.0,,26-jan-2015,van,premium ad car,...,110112.0,27502.0,1376.0,,25-jan-2035,uncategorized,low mileage unit. well maintained vehicle. vie...,factory radio setting. front recording camera....,,43800.0
2,1026909,Mercedes-Benz CLA-Class CLA180,mercedes-benz,cla180,1 owner c&c unit. full agent service with 1 mo...,2016.0,,25-jul-2016,luxury sedan,"parf car, premium ad car",...,80000.0,27886.0,26041.0,,,uncategorized,responsive and fuel efficient 1.6l inline 4 cy...,dual electric/memory seats. factory fitted aud...,,95500.0
3,1019371,Mercedes-Benz E-Class E180 Avantgarde,mercedes-benz,e180,"fully agent maintained, 3 years warranty 10 ye...",2019.0,,17-nov-2020,luxury sedan,"parf car, almost new car, consignment car",...,9800.0,46412.0,56977.0,,,uncategorized,"1.5l inline-4 twin scroll turbocharged engine,...",64 colour ambient lighting. active parking ass...,,197900.0
4,1031014,Honda Civic 1.6A VTi,,civic,"kah motor unit! 1 owner, lowest 1.98% for full...",2019.0,,20-sep-2019,mid-sized sedan,parf car,...,40000.0,20072.0,20101.0,,,uncategorized,"1.6l i-vtec engine, 123 bhp, earth dreams cvt ...","s/rims, premium leather seats, factory touch s...",,103200.0


## Approach: Content based model using Item-Item similarities

### Preprocessing

In [12]:
# Mapping of listing id to array index
listing_id_to_idx, index_to_listing_id = {}, {}
listing_ids = df["listing_id"].unique()

for idx, listing_id in enumerate(listing_ids):
    listing_id_to_idx[listing_id] = idx
    index_to_listing_id[idx] = listing_id

In [16]:
# Processing numerical data
numerical_features = ["manufactured", "mileage", "price", "curb_weight", "power", "engine_cap", "no_of_owners"]
cars_num_features = np.array(df[numerical_features].fillna(-1))

# Processing text data
text_features = ['transmission','category','description','type_of_vehicle','accessories']

def combine_features(row):
        combined = [str(row[feature]) for feature in text_features]
        combined = " ".join(combined)
        return combined

df["combined_features"] = df.apply(combine_features,axis=1)
cv = CountVectorizer() 
count_matrix = cv.fit_transform(df["combined_features"])

In [17]:
# Calculate cosine similarity

# Numerical features
num_features_cosine_similarities = cosine_similarity(cars_num_features, cars_num_features)

# Text features using the count vectorizer
text_features_cosine_similarities = cosine_similarity(count_matrix, count_matrix)

# Create an aggregated cosine similarity matrix
w1 = 1
w2 = 1
combined_cosine_similarities = w1 * num_features_cosine_similarities + w2 * text_features_cosine_similarities

In [20]:
def get_top_recommendations_item_item(row, **kwargs) -> pd.DataFrame:
    
    #####################################################
    ## Initialize the required parameters
    
    # The number of recommendations seem recommended
    # Additional input parameters are up to you
    k = None
    
    # Extract all **kwargs input parameters
    # and set the used paramaters (here: k)
    for key, value in kwargs.items():
        if key == 'k':
            k = value
            
    #####################################################
    ## Compute your recommendations
    
    listing_index = listing_id_to_idx[row["listing_id"]]
    listing_similarities = combined_cosine_similarities[listing_index]
    listing_similarities[listing_index] = -1 # we don't want to select the same listing
    listing_top_k_similarity = np.argpartition(listing_similarities, -k)[-k:]
    listing_top_k_similarity_sorted = np.flip(listing_top_k_similarity[np.argsort(listing_similarities[listing_top_k_similarity])])
    df_result= df.iloc[listing_top_k_similarity_sorted]
        
    # Return the dataset with the k recommendations
    return df_result

### Testing

In [28]:
row_id = 5
row = df.iloc[row_id]

k = 5
pd.DataFrame([row])

Unnamed: 0,listing_id,title,make,model,description,manufactured,original_reg_date,reg_date,type_of_vehicle,category,...,omv,arf,opc_scheme,lifespan,eco_category,features,accessories,indicative_price,price,combined_features
5,1027957,Mercedes-Benz A-Class A35 AMG 4MATIC Premium Plus,mercedes-benz,a35,one and only a35 with a45 features in sg! bidd...,,25-dec-2020,,sports car,"imported used vehicle, coe car",...,34000.0,,,,uncategorized,powered by a newly developed 2 litre four cyli...,"fitted with amg aero package, amg performance ...",,273800.0,"auto imported used vehicle, coe car one and on..."


In [29]:
df_recommendations = get_top_recommendations_item_item(row, k=k)
df_recommendations.head(k)

Unnamed: 0,listing_id,title,make,model,description,manufactured,original_reg_date,reg_date,type_of_vehicle,category,...,omv,arf,opc_scheme,lifespan,eco_category,features,accessories,indicative_price,price,combined_features
16533,995076,Mercedes-Benz C-Class C63 S AMG,mercedes-benz,amg,unbeatable price with c63 s model! with a bran...,,06-mar-2019,,sports car,"imported used vehicle, coe car, rare & exotic",...,70000.0,,,,uncategorized,powerful 4.0l v8 twin-turbocharged pushing 503...,"19"" amg rims, amg sports seats, amg brakes, am...",,438800.0,"auto imported used vehicle, coe car, rare & ex..."
3872,985686,Mercedes-Benz GLE-Class GLE43 Coupe AMG 4MATIC...,mercedes-benz,amg,unbeatable price with gle43 coupe amg night ed...,,04-jun-2019,,suv,"imported used vehicle, coe car",...,50000.0,,,,uncategorized,3.0l v6 bi-turbo engine at 385bhp/520nm. 9g-tr...,"paddle shift, adaptive damping, power tailgate...",,317800.0,"auto imported used vehicle, coe car unbeatable..."
6398,999458,BMW M Series M4 Convertible Competition Package,bmw,m4,unbeatable price with m4 convertible competiti...,2018.0,18-jun-2018,16-jun-2021,sports car,"imported used vehicle, parf car, almost new car",...,42780.0,51892.0,,,uncategorized,bmw m twin power turbo inline 6 cylinder petro...,"m competition package, original m-bodykits, ha...",,328800.0,"auto imported used vehicle, parf car, almost n..."
300,997405,BMW M Series M4 Coupe Competition Package,bmw,m4,unbeatable price with m4 coupe competition pac...,,25-sep-2019,,sports car,"imported used vehicle, coe car",...,50000.0,,,,uncategorized,bmw m twin power turbo inline 6 cylinder petro...,"m competition package, carbon fiber roof, orig...",,339800.0,"auto imported used vehicle, coe car unbeatable..."
12150,1025352,BMW 5 Series 530i M-Sport,bmw,530i,unbeatable price with bmw 530i m-sport! with a...,,26-sep-2018,,luxury sedan,"imported used vehicle, coe car",...,24000.0,,,,uncategorized,"powerful 2.0l turbo charged engine,8 speed ste...","leather seats, 19"" m sports rims, audio system...",,218800.0,"auto imported used vehicle, coe car unbeatable..."
