# TF-IDF

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

recipes = ['preheat oven to 425 degrees f', 'press dough into the bottom and sides of a 12 inch pizza pan', 'bake for 5 minutes until set but not browned', 'cut sausage into small pieces', 'whisk eggs and milk in a bowl until frothy', 'spoon sausage over baked crust and sprinkle with cheese', 'pour egg mixture slowly over sausage and cheese', 's& p to taste', 'bake 15-20 minutes or until eggs are set and crust is brown']

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(recipes)

ingredient_weights = tfidf_matrix.toarray()

for i, recipe in enumerate(recipes):
    print(f"레시피 {i+1}의 재료 비중:")
    for j, ingredient in enumerate(tfidf_vectorizer.get_feature_names_out()):
        weight = ingredient_weights[i][j]
        if weight > 0:
            print(f"{ingredient}: {weight:.2f}")


레시피 1의 재료 비중:
425: 0.46
degrees: 0.46
oven: 0.46
preheat: 0.46
to: 0.39
레시피 2의 재료 비중:
12: 0.30
and: 0.17
bottom: 0.30
dough: 0.30
inch: 0.30
into: 0.25
of: 0.30
pan: 0.30
pizza: 0.30
press: 0.30
sides: 0.30
the: 0.30
레시피 3의 재료 비중:
bake: 0.33
browned: 0.39
but: 0.39
for: 0.39
minutes: 0.33
not: 0.39
set: 0.33
until: 0.28
레시피 4의 재료 비중:
cut: 0.48
into: 0.41
pieces: 0.48
sausage: 0.36
small: 0.48
레시피 5의 재료 비중:
and: 0.23
bowl: 0.39
eggs: 0.33
frothy: 0.39
in: 0.39
milk: 0.39
until: 0.29
whisk: 0.39
레시피 6의 재료 비중:
and: 0.22
baked: 0.38
cheese: 0.32
crust: 0.32
over: 0.32
sausage: 0.28
spoon: 0.38
sprinkle: 0.38
with: 0.38
레시피 7의 재료 비중:
and: 0.23
cheese: 0.34
egg: 0.40
mixture: 0.40
over: 0.34
pour: 0.40
sausage: 0.29
slowly: 0.40
레시피 8의 재료 비중:
taste: 0.76
to: 0.65
레시피 9의 재료 비중:
15: 0.31
20: 0.31
and: 0.18
are: 0.31
bake: 0.26
brown: 0.31
crust: 0.26
eggs: 0.26
is: 0.31
minutes: 0.26
or: 0.31
set: 0.26
until: 0.23


In [None]:
final_class = [
    "sausage", "bacon", "chicken breast", "chicken thigh", "chicken wing", "beef", "pork", "lamb", "rib", "ham",
    "onion", "garlic", "tomato", "corn", "potato", "ginger", "carrot", "pea", "mushroom", "celery", "spinach",
    "zucchini", "broccoli", "cucumber", "cabbage", "pumpkin", "avocado", "green beans", "radish", "asparagus",
    "leek", "cauliflower", "eggplant", "fennel", "lettuce", "chilli", "sweet potato", "lemon", "apple", "orange",
    "lime", "coconut", "pineapple", "banana", "avocado", "peach", "pear", "apricot", "strawberry", "mango",
    "grapefruit", "watermelon", "plantain", "tangerine", "cream", "chocolate", "yogurt", "sugar", "butter",
    "cheese", "egg", "flour", "milk", "rice", "tofu", "noodle", "oil", "salt"
]

## 각 리스트별 비중 구하기

In [None]:
final_class = list(set(final_class))

tfidf_vectorizer = TfidfVectorizer(vocabulary=final_class)
tfidf_matrix = tfidf_vectorizer.fit_transform(recipes)

ingredient_weights = tfidf_matrix.toarray()

for i, recipe in enumerate(recipes):
    print(f"레시피 {i+1}의 재료 비중:")
    for j, ingredient in enumerate(final_class):
        weight = ingredient_weights[i][j]
        if weight > 0:
            print(f"{ingredient}: {weight:.2f}")

레시피 1의 재료 비중:
레시피 2의 재료 비중:
레시피 3의 재료 비중:
레시피 4의 재료 비중:
sausage: 1.00
레시피 5의 재료 비중:
milk: 1.00
레시피 6의 재료 비중:
sausage: 0.66
cheese: 0.75
레시피 7의 재료 비중:
egg: 0.67
sausage: 0.49
cheese: 0.56
레시피 8의 재료 비중:
레시피 9의 재료 비중:


## 리스트 하나의 문장으로 만들기

In [None]:
recipes = ['preheat oven to 425 degrees f press dough into the bottom and sides of a 12 inch pizza pan bake for 5 minutes until set but not browned cut sausage into small pieces whisk eggs and milk in a bowl until frothy spoon sausage over baked crust and sprinkle with cheese pour egg mixture slowly over sausage and cheese s& p to taste bake 15-20 minutes or until eggs are set and crust is brown']

final_class = list(set(final_class))

tfidf_vectorizer = TfidfVectorizer(vocabulary=final_class)
tfidf_matrix = tfidf_vectorizer.fit_transform(recipes)

ingredient_weights = tfidf_matrix.toarray()

for i, recipe in enumerate(recipes):
    print(f"레시피의 재료 비중:")
    for j, ingredient in enumerate(final_class):
        weight = ingredient_weights[i][j]
        if weight > 0:
            print(f"{ingredient}: {weight:.2f}")


레시피의 재료 비중:
egg: 0.26
sausage: 0.77
milk: 0.26
cheese: 0.52


## 단순 더하기

In [None]:
recipes = ['preheat oven to 425 degrees f', 'press dough into the bottom and sides of a 12 inch pizza pan', 'bake for 5 minutes until set but not browned', 'cut sausage into small pieces', 'whisk eggs and milk in a bowl until frothy', 'spoon sausage over baked crust and sprinkle with cheese', 'pour egg mixture slowly over sausage and cheese', 's& p to taste', 'bake 15-20 minutes or until eggs are set and crust is brown']

final_class = list(set(final_class))

tfidf_vectorizer = TfidfVectorizer(vocabulary=final_class)
tfidf_matrix = tfidf_vectorizer.fit_transform(recipes)

ingredient_weights = tfidf_matrix.toarray()

total_ingredient_weights = {ingredient: 0 for ingredient in final_class}
for i, recipe in enumerate(recipes):
    for j, ingredient in enumerate(final_class):
        weight = ingredient_weights[i][j]
        total_ingredient_weights[ingredient] += weight

print("전체 레시피에서의 재료 비중:")
for ingredient, total_weight in total_ingredient_weights.items():
    if total_weight > 0:
        print(f"{ingredient}: {total_weight:.2f}")

전체 레시피에서의 재료 비중:
egg: 0.67
sausage: 2.15
milk: 1.00
cheese: 1.32


# Count Vector

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

recipes = ['preheat oven to 425 degrees f', 'press dough into the bottom and sides of a 12 inch pizza pan', 'bake for 5 minutes until set but not browned', 'cut sausage into small pieces', 'whisk eggs and milk in a bowl until frothy', 'spoon sausage over baked crust and sprinkle with cheese', 'pour egg mixture slowly over sausage and cheese', 's& p to taste', 'bake 15-20 minutes or until eggs are set and crust is brown']

final_class = list(set(final_class))

count_vectorizer = CountVectorizer(vocabulary=final_class)
count_matrix = count_vectorizer.fit_transform(recipes)

ingredient_counts = count_matrix.toarray()

total_ingredient_counts = {ingredient: 0 for ingredient in final_class}

for i, recipe in enumerate(recipes):
    for j, ingredient in enumerate(final_class):
        count = ingredient_counts[i][j]
        total_ingredient_counts[ingredient] += count

for ingredient, total_count in total_ingredient_counts.items():
    if total_count > 0:
        print(f"{ingredient}: {total_count}")


egg: 1
sausage: 3
milk: 1
cheese: 2
