This notebook creates a very simple model for generating outfits. This is sligth better than a random model. It computes the most popular category combinations in the outfits dataset and sort the candidate items, by this category combination pupularity.

This script takes less than 5 minutes to run

# Train - Computation of the category combinations

In [None]:
import datetime
print(datetime.datetime.now())

In [None]:
import pandas as pd

full_outfits = pd.read_parquet("../data/manual_outfits.parquet")
full_outfits.head()

In [None]:
items_metadata = pd.read_parquet("../data/products.parquet")
items_metadata = items_metadata[["product_id", "product_category"]]
items_metadata.set_index("product_id", inplace=True)
items_metadata.head()

In [None]:
outfits_category = full_outfits.explode(column="products")
outfits_category = outfits_category.merge(
    items_metadata,
    left_on="products",
    right_on=items_metadata.index,
    how="inner"
)
outfits_category.rename(columns={"product_category": "categories"}, inplace=True)
outfits_category.head()

In [None]:
outfits_category = outfits_category.groupby("outfit_id").agg({"categories": list, "products": list})
outfits_category.reset_index(inplace=True)

outfits_category["categories_sorted"] = outfits_category.apply(lambda row: frozenset(row["categories"]), axis=1)
outfits_category.head()

In [None]:
from collections import Counter, defaultdict

categories_sets = Counter(outfits_category.categories_sorted.values)
toy_model = {
    "items_metadata": items_metadata,
    "categories_sets": categories_sets
}

# Predict - Using the category sets to rank the candidates

### Use the file name generatad by evaluation/simple_split_dataset.ipynb here

In [None]:
incomplete_outfits_input = "../data/manual_outfits_testinput_1651167032.parquet"

In [None]:
test_outfits = pd.read_parquet(incomplete_outfits_input)
test_outfits.head()

In [None]:
def predict(model, incomplete_outfit, candidates):
    """
    This is the core of your model. In our example, we are going to use the toy model we build in the first part 
    of this notebook, but you are free to create your amazing model and use it here.
    Arguments:
        model: the outfits model
        incomplete_outfit: a list of product_id containing the outfit we want to complete
        candidates: a list of product_id that you are tasked to select the rigth product to complete the outfit
    Return
        A list of product_id sorted according your model
    """
    scores = []
    for candidate in candidates:
        outfit = incomplete_outfit + [candidate]
        try:
            categories = model["items_metadata"].loc[outfit]["product_category"].tolist()
            score = model["categories_sets"].get(frozenset(categories), 0)
        except KeyError:
            score = 0
        scores.append((score, candidate))
    return [
        candidate
        for _, candidate in sorted(scores, reverse=True)
    ]

test_outfits["predicted_products"] = test_outfits.apply(lambda row: predict(toy_model, row["incomplete_outfit"], row["candidates"]), axis=1)
test_outfits["predicted_product"] = test_outfits.apply(lambda row: row["predicted_products"][0], axis=1)
test_outfits.head()

In [None]:
output_name = incomplete_outfits_input.replace(".parquet", "_predictions.csv")
output_columns = ["outfit_id", "predicted_product"]
test_outfits[output_columns].to_csv(output_name, header=True, index=False)

In [None]:
print(datetime.datetime.now())