In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Load translated dataframe
import pandas as pd
chickens = pd.read_csv('../csv/translated/chickens_translated_en.csv')
chickens

In [None]:
# Inspect flavour profiles
chickens["FlavorProfile"]

In [None]:
# Clean whitespaces
chickens["FlavorProfile"] = chickens["FlavorProfile"].str.replace(" ", "")
chickens["FlavorProfile"]

In [None]:
# Inspect additional flavour profiles that was not in system_prompt
flavour_profile = chickens["FlavorProfile"]
filter_flavour_profile = flavour_profile
known_categories = ["sweet", "sour", "salty", "bitter", "umami", "savory", "smoky"]
for category in known_categories:
    filter_flavour_profile = (
        filter_flavour_profile.
        str.replace(category, "").
        str.replace(r'^[^a-zA-Z]+|[^a-zA-Z]+$|,(?=,)', '', regex=True)
    )
filter_flavour_profile = filter_flavour_profile[filter_flavour_profile.str.strip().ne("")]
filter_flavour_profile

In [None]:
# Count values
count_filtered_flavours = filter_flavour_profile.value_counts().reset_index().sort_values("count", ascending=False)
count_filtered_flavours

In [None]:
# Load zero-shot-classification model
from transformers import pipeline
model = "facebook/bart-large-mnli"
zero_shot_classifier = pipeline(
    "zero-shot-classification",
    model=model,
    device="cpu"
)

In [None]:
# Get the flavor profile of the  first recipe entry
first_entry_flavor_profile = chickens.loc[0]["FlavorProfile"].split(",")
first_entry_flavor_profile = first_entry_flavor_profile[0:2]
first_entry_flavor_profile

In [None]:
# Get the ingredients of the first entry
first_entry_ingredients = chickens.loc[0]["Ingredients"]
first_entry_ingredients

In [None]:
# Test for zero-shot-classification task
result = zero_shot_classifier(first_entry_ingredients, first_entry_flavor_profile)
result

In [None]:
# Get max score and most dominant flavor profile
import numpy as np
dominant_fp = result["labels"][np.argmax(result["scores"])]
dominant_fp

In [None]:
# Create function to predict most dominant flavor profile to the recipe
def predict_most_dominant_fp(recipe):
    classified = zero_shot_classifier(recipe["Ingredients"], recipe["FlavorProfile"])
    max_score_index = np.argmax(classified["scores"])
    return classified["labels"][max_score_index]
first_entry = chickens.loc[0]
predict_most_dominant_fp(first_entry)

In [None]:
# Customize the progress bar appearance
from tqdm.notebook import tqdm
tqdm.pandas(
    desc="Predicting Dominant Flavor Profile",
    bar_format="{l_bar}{bar:50}{r_bar}",
    colour='blue'
)

In [None]:
# Start prediction
chickens['DominantFlavor'] = chickens.progress_apply(predict_most_dominant_fp, axis=1)
chickens

In [None]:
# Finalize data processing
path = "../csv/translated"
eggs = pd.read_csv(f"{path}/eggs_translated_en.csv")
fishs = pd.read_csv(f"{path}/fishs_translated_en.csv")
lambs = pd.read_csv(f"{path}/lambs_translated_en.csv")
tofus = pd.read_csv(f"{path}/tofus_translated_en.csv")
soybeans = pd.read_csv(f"{path}/soybeans_translated_en.csv")
prawns = pd.read_csv(f"{path}/prawns_translated_en.csv")

In [None]:
# Assign dominant flavor profile
df_names = ["tofus", "prawns"]
dfs = [lambs, tofus, soybeans, prawns]
print("Start Assignments ...")
for name, df in zip(df_names, dfs):
   with tqdm(
           total=len(df),
           bar_format = "{l_bar}{bar}|{n_fmt}/{total_fmt}[{remaining}]",
           colour="blue",
           desc=f"Processing {name}") as pbar:
        df["DominantFlavor"] = df.apply(
            lambda x: (pbar.update(1), predict_most_dominant_fp(x))[1],
            axis=1
        )
        df.to_csv(f"../csv/temp/final_{name}_temp.csv", index=False)
print("Assignments Completed")

In [None]:
# Combine all indonesian recipes
indonesian_recipes = pd.concat([
    chickens,
    eggs,
    fishs,
    lambs,
    soybeans,
    prawns,
    tofus
], axis=0)
indonesian_recipes

In [None]:
# Save final recipes
indonesian_recipes.to_csv(f"../csv/final/indonesian_recipes.csv", index=False)
print("Indonesian Recipes Completed")