In [2]:
# Required Libraries
import pandas as pd
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
import gensim.downloader as api
from fastapi import FastAPI

# Load pre-trained word embeddings (GloVe in this example)
word_vectors = api.load("glove-wiki-gigaword-100")  # Pre-trained GloVe embeddings with 100-dimension vectors

# Step 1: Load the Datasets
df_normal = pd.read_excel(r"C:\Users\VIKRAM\Downloads\NLP DATASET.xlsx")  # Replace with your actual file path
df_diseased = pd.read_excel(r"C:\Users\VIKRAM\Downloads\nlp dieseases dataset.xlsx")  # Replace with your actual file path

# Step 2: Preprocessing Function to clean text
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text.lower())  # Remove non-alphabetic characters and convert to lowercase
    return text

# Step 3: Combine columns to create 'features' for both datasets
# For Normal dataset
df_normal['features'] = (df_normal['type'] + " " + df_normal['meal type'] + " " + 
                         df_normal['person type'] + " " + df_normal['diet type'])
df_normal['features'] = df_normal['features'].apply(preprocess_text)

# For Diseased dataset
df_diseased['features'] = (df_diseased['type'] + " " + df_diseased['meal type'] + " " + 
                           df_diseased['disease type'])
df_diseased['features'] = df_diseased['features'].apply(preprocess_text)

# Step 4: Initialize the BERT-based NER model using Hugging Face's transformers
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

# Step 5: Define a function to extract entities using the BERT-based NER
def extract_entities_bert(text):
    ner_results = ner_pipeline(text)
    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }
    for ent in ner_results:
        entity_label = ent['entity_group']
        if entity_label == "MISC":
            if not entities['diet_type']:
                entities['diet_type'] = ent['word']
            elif not entities['meal_type']:
                entities['meal_type'] = ent['word']
        elif entity_label == "FOOD":
            entities['food_type'] = ent['word']
        elif entity_label == "PER":
            entities['person_type'] = ent['word']
        elif entity_label == "DISEASE":
            entities['disease_type'] = ent['word']
    return entities

# Step 6: Vectorize features in both datasets using TF-IDF with bigrams and trigrams
vectorizer_normal = TfidfVectorizer(ngram_range=(1, 3), stop_words='english')  # Adding bigrams and trigrams
vectorizer_diseased = TfidfVectorizer(ngram_range=(1, 3), stop_words='english')

tfidf_matrix_normal = vectorizer_normal.fit_transform(df_normal['features'])
tfidf_matrix_diseased = vectorizer_diseased.fit_transform(df_diseased['features'])

# Step 7: Function to get word embedding of a sentence
def get_sentence_embedding(text, model):
    words = preprocess_text(text).split()
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Vectorize both normal and diseased datasets using word embeddings
df_normal['embedding'] = df_normal['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))
df_diseased['embedding'] = df_diseased['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))

# Step 8: Function to recommend food based on extracted entities
def recommend_food_based_on_entities(entities):
    if entities['disease_type']:
        # For diseased diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['disease_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_diseased['embedding'].values))
        idx = np.argmax(similarities)
        return df_diseased['recommend'].iloc[idx]
    else:
        # For normal diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['person_type']} {entities['diet_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_normal['embedding'].values))
        idx = np.argmax(similarities)
        return df_normal['recommend'].iloc[idx]

# Step 9: Function to get user input and recommend food
def get_recommendation():
    # Get user input as a prompt
    prompt = input("Please describe your diet preferences: ")

    # Extract entities from the user's prompt using BERT-based NER
    extracted_entities = extract_entities_bert(prompt)
    print(f"Extracted Entities: {extracted_entities}")

    # Recommend food based on the extracted entities
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)

# Run the system
get_recommendation()





Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Extracted Entities: {'meal_type': None, 'food_type': None, 'person_type': None, 'diet_type': None, 'disease_type': None}
Recommended Food:  chicken breast,salmon,one egg


In [1]:
# Required Libraries
import pandas as pd
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
import gensim.downloader as api

# Step 1: Load the Datasets (Adjust the paths as per your setup)
df_normal = pd.read_excel(r"C:\Users\VIKRAM\Downloads\NLP DATASET.xlsx")  # Replace with your actual file path
df_diseased = pd.read_excel(r"C:\Users\VIKRAM\Downloads\nlp dieseases dataset.xlsx")  # Replace with your actual file path

# Step 2: Preprocessing Function to clean text
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text.lower())  # Remove non-alphabetic characters and convert to lowercase
    return text

# Step 3: Combine columns to create 'features' for both datasets
df_normal['features'] = (df_normal['type'] + " " + df_normal['meal type'] + " " + 
                         df_normal['person type'] + " " + df_normal['diet type'])
df_normal['features'] = df_normal['features'].apply(preprocess_text)

df_diseased['features'] = (df_diseased['type'] + " " + df_diseased['meal type'] + " " + 
                           df_diseased['disease type'])
df_diseased['features'] = df_diseased['features'].apply(preprocess_text)

# Step 4: Initialize the BERT-based NER model using Hugging Face's transformers
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

# Step 5: Custom rule-based entity extraction logic (for known terms)
def rule_based_entity_extraction(text):
    # Define common meal types, food types, person types, and diet types
    meal_types = ['breakfast', 'lunch', 'dinner', 'snack']
    food_types = ['veg', 'non-veg', 'vegetarian', 'meat', 'chicken', 'fish', 'salmon', 'egg']
    person_types = ['child', 'adult', 'elderly', 'senior', 'athlete', 'pregnant']
    diet_types = ['low-carb', 'high-protein', 'low-fat', 'vegan', 'keto', 'paleo']
    disease_types = ['diabetes', 'hypertension', 'obesity', 'heart disease']

    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }
    
    # Extract meal type
    for meal in meal_types:
        if meal in text.lower():
            entities['meal_type'] = meal
            break
    
    # Extract food type
    for food in food_types:
        if food in text.lower():
            entities['food_type'] = food
            break
    
    # Extract person type
    for person in person_types:
        if person in text.lower():
            entities['person_type'] = person
            break
    
    # Extract diet type
    for diet in diet_types:
        if diet in text.lower():
            entities['diet_type'] = diet
            break
    
    # Extract disease type
    for disease in disease_types:
        if disease in text.lower():
            entities['disease_type'] = disease
            break
    
    return entities

# Step 6: Combine rule-based extraction with BERT-based NER
def extract_entities(text):
    bert_entities = extract_entities_bert(text)  # BERT NER output
    rule_entities = rule_based_entity_extraction(text)  # Rule-based entity extraction
    
    # Combine both results, with rule-based extraction taking precedence if applicable
    entities = {
        'meal_type': rule_entities['meal_type'] or bert_entities['meal_type'],
        'food_type': rule_entities['food_type'] or bert_entities['food_type'],
        'person_type': rule_entities['person_type'] or bert_entities['person_type'],
        'diet_type': rule_entities['diet_type'] or bert_entities['diet_type'],
        'disease_type': rule_entities['disease_type'] or bert_entities['disease_type'],
    }
    
    return entities

# Step 7: Load pre-trained word embeddings (GloVe in this example)
word_vectors = api.load("glove-wiki-gigaword-100")  # Pre-trained GloVe embeddings with 100-dimension vectors

# Step 8: Function to get word embedding of a sentence
def get_sentence_embedding(text, model):
    words = preprocess_text(text).split()
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Vectorize both normal and diseased datasets using word embeddings
df_normal['embedding'] = df_normal['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))
df_diseased['embedding'] = df_diseased['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))

# Step 9: Function to recommend food based on extracted entities
def recommend_food_based_on_entities(entities):
    if entities['disease_type']:
        # For diseased diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['disease_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_diseased['embedding'].values))
        idx = np.argmax(similarities)
        return df_diseased['recommend'].iloc[idx]
    else:
        # For normal diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['person_type']} {entities['diet_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_normal['embedding'].values))
        idx = np.argmax(similarities)
        return df_normal['recommend'].iloc[idx]

# Step 10: Function to get user input and recommend food
def get_recommendation():
    # Get user input as a prompt
    prompt = input("Please describe your diet preferences: ")

    # Extract entities from the user's prompt using both rule-based and BERT NER
    extracted_entities = extract_entities(prompt)
    print(f"Extracted Entities: {extracted_entities}")

    # Recommend food based on the extracted entities
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)

# Run the system
get_recommendation()


KeyboardInterrupt: 

In [4]:
import pandas as pd
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from fuzzywuzzy import process

# Load datasets
df_normal = pd.read_excel(r"C:\Users\VIKRAM\Downloads\NLP DATASET.xlsx")
df_diseased = pd.read_excel(r"C:\Users\VIKRAM\Downloads\nlp dieseases dataset.xlsx")

# Preprocess text
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', str(text).lower())
    return text

df_normal['features'] = (df_normal['type'] + " " + df_normal['meal type'] + " " +
                         df_normal['person type'] + " " + df_normal['diet type'])
df_normal['features'] = df_normal['features'].apply(preprocess_text)

df_diseased['features'] = (df_diseased['type'] + " " + df_diseased['meal type'] + " " +
                           df_diseased['disease type'])
df_diseased['features'] = df_diseased['features'].apply(preprocess_text)

# Initialize models
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
nltk.download('wordnet')
nltk.download('omw-1.4')
lemmatizer = WordNetLemmatizer()

# Define terms
meal_types = ['breakfast', 'lunch', 'dinner', 'snack']
food_types = ['veg', 'non-veg', 'vegetarian', 'meat', 'chicken', 'fish', 'salmon', 'egg']
person_types = ['child', 'adult', 'elderly', 'senior', 'athlete', 'pregnant']
diet_types = ['low-carb', 'high-protein', 'low-fat', 'vegan', 'keto', 'paleo']
disease_types = ['diabetes', 'hypertension', 'obesity', 'heart disease']

def expand_terms(terms):
    expanded_terms = set(terms)
    for term in terms:
        for syn in wordnet.synsets(term):
            for lemma in syn.lemmas():
                expanded_terms.add(lemma.name().replace('_', ' '))
    return list(expanded_terms)

meal_types = expand_terms(meal_types)
food_types = expand_terms(food_types)
person_types = expand_terms(person_types)
diet_types = expand_terms(diet_types)
disease_types = expand_terms(disease_types)

# Entity extraction functions
def extract_entities_bert(text):
    ner_results = ner_pipeline(text)
    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }

    for entity in ner_results:
        word = entity['word'].lower()
        if word in meal_types:
            entities['meal_type'] = word
        elif word in food_types:
            entities['food_type'] = word
        elif word in person_types:
            entities['person_type'] = word
        elif word in diet_types:
            entities['diet_type'] = word
        elif word in disease_types:
            entities['disease_type'] = word
    return entities

def fuzzy_match(word, choices):
    match, score = process.extractOne(word, choices)
    return match if score >= 80 else None

def rule_based_entity_extraction(text):
    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }

    words = [lemmatizer.lemmatize(word.lower()) for word in text.split()]

    for word in words:
        if not entities['meal_type']:
            entities['meal_type'] = fuzzy_match(word, meal_types)
        if not entities['food_type']:
            entities['food_type'] = fuzzy_match(word, food_types)
        if not entities['person_type']:
            entities['person_type'] = fuzzy_match(word, person_types)
        if not entities['diet_type']:
            entities['diet_type'] = fuzzy_match(word, diet_types)
        if not entities['disease_type']:
            entities['disease_type'] = fuzzy_match(word, disease_types)
    return entities

def extract_entities(text):
    bert_entities = extract_entities_bert(text)
    rule_entities = rule_based_entity_extraction(text)
    entities = {
        'meal_type': rule_entities['meal_type'] or bert_entities['meal_type'],
        'food_type': rule_entities['food_type'] or bert_entities['food_type'],
        'person_type': rule_entities['person_type'] or bert_entities['person_type'],
        'diet_type': rule_entities['diet_type'] or bert_entities['diet_type'],
        'disease_type': rule_entities['disease_type'] or bert_entities['disease_type'],
    }
    return entities

# Compute embeddings
def get_sentence_embedding(text, model):
    return model.encode(text)

df_normal['embedding'] = df_normal['features'].apply(lambda x: get_sentence_embedding(x, sentence_model))
df_diseased['embedding'] = df_diseased['features'].apply(lambda x: get_sentence_embedding(x, sentence_model))

# Recommendation function
def recommend_food_based_on_entities(entities):
    try:
        if entities['disease_type']:
            user_input = f"{entities['food_type']} {entities['meal_type']} {entities['disease_type']}"
            user_embedding = get_sentence_embedding(user_input, sentence_model)
            similarities = cosine_similarity([user_embedding], np.stack(df_diseased['embedding'].values))
            idx = np.argmax(similarities)
            return df_diseased['recommend'].iloc[idx]
        else:
            user_input = f"{entities['food_type']} {entities['meal_type']} {entities['person_type']} {entities['diet_type']}"
            user_embedding = get_sentence_embedding(user_input, sentence_model)
            similarities = cosine_similarity([user_embedding], np.stack(df_normal['embedding'].values))
            idx = np.argmax(similarities)
            return df_normal['recommend'].iloc[idx]
    except Exception as e:
        print(f"Error in recommendation: {e}")
        return None

def get_recommendation():
    prompt = input("Please describe your diet preferences: ")

    extracted_entities = extract_entities(prompt)
    print(f"Extracted Entities: {extracted_entities}")

    recommendation = recommend_food_based_on_entities(extracted_entities)
    if recommendation:
        print("Recommended Food:", recommendation)
    else:
        print("Sorry, no suitable recommendation found. Please provide more details.")

# Run the system
get_recommendation()


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\VIKRAM\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\VIKRAM\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-d

Extracted Entities: {'meal_type': 'snack', 'food_type': 'crybaby', 'person_type': 'aged', 'diet_type': 'paleo', 'disease_type': 'diabetes'}
Recommended Food: Grilled chicken with mixed vegetable salad.


In [5]:
import requests
import openai

# Set your Edamam API credentials
EDAMAM_APP_ID = '2a98c6a4'  # Replace with your Edamam APP ID
EDAMAM_APP_KEY = '75704984fc122dc3153ae7a943f3cb56'  # Replace with your Edamam APP KEY

# Set your OpenAI API key
openai.api_key = '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'  # Replace with your OpenAI API key

# Function to get nutritional information from Edamam API
def get_nutritional_info(food_item):
    url = f"https://api.edamam.com/api/nutrition-data?app_id=2a98c6a4&app_key=75704984fc122dc3153ae7a943f3cb56&nutrition-type=cooking&ingr={}
"
    params = {
        'app_id': EDAMAM_APP_ID,
        'app_key': EDAMAM_APP_KEY,
        'nutrition-type': 'logging',
        'ingr': food_item
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Function to format nutritional information for output
def format_nutrition_info(nutrition_data):
    if nutrition_data:
        nutrients = nutrition_data.get('totalNutrients', {})
        formatted_info = []
        for nutrient, details in nutrients.items():
            formatted_info.append(f"{details['label']}: {details['quantity']} {details['unit']}")
        return "\n".join(formatted_info)
    return "Nutritional information not available."

# Function to generate a recipe using OpenAI API
def generate_recipe(food_item):
    prompt = f"Generate a detailed recipe for making {food_item}, including ingredients and step-by-step instructions."
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  # You can choose a different model if needed
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    
    recipe = response.choices[0].message['content']
    return recipe

# Function to get user input and recommend food
def get_recommendation():
    # Get user input as a prompt
    prompt = input("Please describe your diet preferences: ")

    # Extract entities from the user's prompt using BERT-based NER
    extracted_entities = extract_entities_bert(prompt)
    print(f"Extracted Entities: {extracted_entities}")

    # Recommend food based on the extracted entities
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)

    # Get nutritional analysis for the recommended food
    nutrition = get_nutritional_info(recommendation)
    nutritional_analysis = format_nutrition_info(nutrition)
    print("Nutritional Analysis:\n", nutritional_analysis)

    # Generate recipe for the recommended food
    recipe = generate_recipe(recommendation)
    print("Recipe to Prepare:\n", recipe)

# Run the system
get_recommendation()


Extracted Entities: {'meal_type': None, 'food_type': None, 'person_type': None, 'diet_type': None, 'disease_type': None}
Recommended Food: idly,rice, curry and milk


ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [16]:
# Step 1: Load the Datasets (Adjust the paths as per your setup)
df_normal = pd.read_excel(r"C:\Users\VIKRAM\Downloads\NLP DATASET.xlsx")  # Replace with your actual file path
df_diseased = pd.read_excel(r"C:\Users\VIKRAM\Downloads\nlp dieseases dataset.xlsx")  # Replace with your actual file path

# Step 2: Preprocessing Function to clean text
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text.lower())  # Remove non-alphabetic characters and convert to lowercase
    return text

# Step 3: Combine columns to create 'features' for both datasets
df_normal['features'] = (df_normal['type'] + " " + df_normal['meal type'] + " " + 
                         df_normal['person type'] + " " + df_normal['diet type'])
df_normal['features'] = df_normal['features'].apply(preprocess_text)

df_diseased['features'] = (df_diseased['type'] + " " + df_diseased['meal type'] + 
                           " " + df_diseased['disease type'])
df_diseased['features'] = df_diseased['features'].apply(preprocess_text)

# Step 4: Initialize the BERT-based NER model using Hugging Face's transformers
from transformers import pipeline
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

# Step 5: Custom rule-based entity extraction logic (for known terms)
def rule_based_entity_extraction(text):
    # Define common meal types, food types, person types, and diet types
    meal_types = ['breakfast', 'lunch', 'dinner', 'snack']
    food_types = ['veg', 'non-veg', 'vegetarian', 'meat', 'chicken', 'fish', 'salmon', 'egg']
    person_types = ['child', 'adult', 'elderly', 'senior', 'athlete', 'pregnant']
    diet_types = ['low-carb', 'high-protein', 'low-fat', 'vegan', 'keto', 'paleo']
    disease_types = ['diabetes', 'hypertension', 'obesity', 'heart disease']

    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }
    
    # Extract meal type
    for meal in meal_types:
        if meal in text.lower():
            entities['meal_type'] = meal
            break
    
    # Extract food type
    for food in food_types:
        if food in text.lower():
            entities['food_type'] = food
            break
    
    # Extract person type
    for person in person_types:
        if person in text.lower():
            entities['person_type'] = person
            break
    
    # Extract diet type
    for diet in diet_types:
        if diet in text.lower():
            entities['diet_type'] = diet
            break
    
    # Extract disease type
    for disease in disease_types:
        if disease in text.lower():
            entities['disease_type'] = disease
            break
    
    return entities

# Step 6: Extract entities using both rule-based and BERT-based NER
def extract_entities(text):
    # BERT-based NER output
    bert_entities = ner_pipeline(text)
    rule_entities = rule_based_entity_extraction(text)  # Rule-based entity extraction
    
    # Combine both results, with rule-based extraction taking precedence if applicable
    entities = {
        'meal_type': rule_entities['meal_type'],
        'food_type': rule_entities['food_type'],
        'person_type': rule_entities['person_type'],
        'diet_type': rule_entities['diet_type'],
        'disease_type': rule_entities['disease_type'],
    }
    
    return entities

# Step 7: Load pre-trained word embeddings (GloVe in this example)
import gensim.downloader as api
word_vectors = api.load("glove-wiki-gigaword-100")  # Pre-trained GloVe embeddings with 100-dimension vectors

# Step 8: Function to get word embedding of a sentence
def get_sentence_embedding(text, model):
    words = preprocess_text(text).split()
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(model.vector_size)

# Vectorize both normal and diseased datasets using word embeddings
df_normal['embedding'] = df_normal['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))
df_diseased['embedding'] = df_diseased['features'].apply(lambda x: get_sentence_embedding(x, word_vectors))

# Step 9: Function to recommend food based on extracted entities
from sklearn.metrics.pairwise import cosine_similarity
def recommend_food_based_on_entities(entities):
    if entities['disease_type']:
        # For diseased diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['disease_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_diseased['embedding'].values))
        idx = np.argmax(similarities)
        return df_diseased['recommend'].iloc[idx]
    else:
        # For normal diet recommendation
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['person_type']} {entities['diet_type']}"
        user_embedding = get_sentence_embedding(user_input, word_vectors)
        similarities = cosine_similarity([user_embedding], np.stack(df_normal['embedding'].values))
        idx = np.argmax(similarities)
        return df_normal['recommend'].iloc[idx]

# Step 10: Function to get user input and recommend food
def get_recommendation():
    # Get user input as a prompt
    prompt = input("Please describe your diet preferences: ")

    # Extract entities from the user's prompt using both rule-based and BERT NER
    extracted_entities = extract_entities(prompt)
    print(f"Extracted Entities: {extracted_entities}")

    # Recommend food based on the extracted entities
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)

# Run the system
get_recommendation()


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Extracted Entities: {'meal_type': 'breakfast', 'food_type': 'veg', 'person_type': None, 'diet_type': None, 'disease_type': None}
Recommended Food: fruits and panner,sugar free foods


In [19]:
import numpy as np
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
from sentence_transformers import SentenceTransformer
import gensim.downloader as api
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
import nltk

# Download NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# Load the Datasets
df_normal = pd.read_excel(r"C:\Users\VIKRAM\Downloads\NLP DATASET.xlsx")
df_diseased = pd.read_excel(r"C:\Users\VIKRAM\Downloads\nlp dieseases dataset.xlsx")

# Preprocessing Function to clean text
def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    stemmer = PorterStemmer()
    stop_words = set(stopwords.words('english'))
    text = re.sub(r'\W+', ' ', text.lower())
    words = text.split()
    words = [stemmer.stem(lemmatizer.lemmatize(word)) for word in words if word not in stop_words]
    return ' '.join(words)

# Combine columns to create 'features' for both datasets
df_normal['features'] = (df_normal['type'] + " " + df_normal['meal type'] + " " + 
                         df_normal['person type'] + " " + df_normal['diet type'])
df_normal['features'] = df_normal['features'].apply(preprocess_text)

df_diseased['features'] = (df_diseased['type'] + " " + df_diseased['meal type'] + 
                           " " + df_diseased['disease type'])
df_diseased['features'] = df_diseased['features'].apply(preprocess_text)

# Fine-tuned NER model setup
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
ner_pipeline = pipeline("ner", model=model_name, tokenizer=model_name, aggregation_strategy="simple")

# Improved rule-based entity extraction
def rule_based_entity_extraction(text):
    meal_types = ['breakfast', 'lunch', 'dinner', 'snack']
    food_types = ['veg', 'non-veg', 'vegetarian', 'meat', 'chicken', 'fish', 'salmon', 'egg']
    person_types = ['child', 'adult', 'elderly', 'senior', 'athlete', 'pregnant','normal']
    diet_types = ['low-carb', 'high-protein', 'low-fat', 'vegan', 'keto', 'paleo']
    disease_types = ['diabetes', 'hypertension', 'obesity', 'heart disease']

    entities = {
        'meal_type': None,
        'food_type': None,
        'person_type': None,
        'diet_type': None,
        'disease_type': None
    }
    
    for meal in meal_types:
        if meal in text.lower():
            entities['meal_type'] = meal
            break
    
    for food in food_types:
        if food in text.lower():
            entities['food_type'] = food
            break
    
    for person in person_types:
        if person in text.lower():
            entities['person_type'] = person
            break
    
    for diet in diet_types:
        if diet in text.lower():
            entities['diet_type'] = diet
            break
    
    for disease in disease_types:
        if disease in text.lower():
            entities['disease_type'] = disease
            break
    
    return entities

# Extract entities using both rule-based and fine-tuned BERT-based NER
def extract_entities(text):
    bert_entities = ner_pipeline(text)
    rule_entities = rule_based_entity_extraction(text)
    
    entities = {
        'meal_type': rule_entities['meal_type'],
        'food_type': rule_entities['food_type'],
        'person_type': rule_entities['person_type'],
        'diet_type': rule_entities['diet_type'],
        'disease_type': rule_entities['disease_type'],
    }
    
    return entities

# Load a better pre-trained sentence transformer model
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Function to get sentence embedding
def get_sentence_embedding(text, model):
    return model.encode(text)

# Vectorize both normal and diseased datasets using sentence embeddings
df_normal['embedding'] = df_normal['features'].apply(lambda x: get_sentence_embedding(x, sentence_model))
df_diseased['embedding'] = df_diseased['features'].apply(lambda x: get_sentence_embedding(x, sentence_model))

# Function to recommend food based on extracted entities
def recommend_food_based_on_entities(entities):
    if entities['disease_type']:
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['disease_type']}"
        user_embedding = get_sentence_embedding(user_input, sentence_model)
        similarities = cosine_similarity([user_embedding], np.stack(df_diseased['embedding'].values))
        idx = np.argmax(similarities)
        return df_diseased['recommend'].iloc[idx]
    else:
        user_input = f"{entities['food_type']} {entities['meal_type']} {entities['person_type']} {entities['diet_type']}"
        user_embedding = get_sentence_embedding(user_input, sentence_model)
        similarities = cosine_similarity([user_embedding], np.stack(df_normal['embedding'].values))
        idx = np.argmax(similarities)
        return df_normal['recommend'].iloc[idx]

# Function to get user input and recommend food
def get_recommendation():
    prompt = input("Please describe your diet preferences: ")
    extracted_entities = extract_entities(prompt)
    print(f"Extracted Entities: {extracted_entities}")
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)

# Run the system
app = FastAPI()
@app.post("/rec")
def get_rec(prompt:str):
    extracted_entities = extract_entities(prompt)
    print(f"Extracted Entities: {extracted_entities}")
    recommendation = recommend_food_based_on_entities(extracted_entities)
    print("Recommended Food:", recommendation)




[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\VIKRAM\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\VIKRAM\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification

Extracted Entities: {'meal_type': 'breakfast', 'food_type': 'veg', 'person_type': 'normal', 'diet_type': None, 'disease_type': None}
Recommended Food: idly or dosa or any type of breakfast


I am an elderly person with diabetes and prefer veg food for breakfast.


In [21]:
import requests

# Set your Edamam API credentials
EDAMAM_APP_ID = '2a98c6a4'  # Replace with your Edamam APP ID
EDAMAM_APP_KEY = '75704984fc122dc3153ae7a943f3cb56'  # Replace with your Edamam APP KEY

# Function to get nutritional information from Edamam API
def get_nutritional_info(food_item):
    url = f"https://api.edamam.com/api/nutrition-data?app_id={EDAMAM_APP_ID}&app_key={EDAMAM_APP_KEY}&nutrition-type=logging&ingr={food_item}"
    
    response = requests.get(url)
    
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Function to format nutritional information for output
def format_nutrition_info(nutrition_data):
    if nutrition_data:
        nutrients = nutrition_data.get('totalNutrients', {})
        formatted_info = []
        for nutrient, details in nutrients.items():
            formatted_info.append(f"{details['label']}: {details['quantity']} {details['unit']}")
        return "\n".join(formatted_info)
    return "Nutritional information not available."

# Function to get user input and perform nutritional analysis
def get_nutritional_analysis():
    # Get user input for food item
    food_item = input("Enter the food item you want nutritional information for: ")

    # Get nutritional analysis for the food item
    nutrition = get_nutritional_info(food_item)
    nutritional_analysis = format_nutrition_info(nutrition)
    print("Nutritional Analysis:\n", nutritional_analysis)

# Run the system
get_nutritional_analysis()


Nutritional Analysis:
 Energy: 524.37 kcal
Total lipid (fat): 22.407 g
Fatty acids, total saturated: 4.897200000000001 g
Fatty acids, total monounsaturated: 5.7057 g
Fatty acids, total polyunsaturated: 10.2795 g
Carbohydrate, by difference: 65.373 g
Carbohydrates (net): 65.373 g
Protein: 14.784 g
Cholesterol: 136.29 mg
Sodium, Na: 1014.09 mg
Calcium, Ca: 505.89 mg
Magnesium, Mg: 36.96 mg
Potassium, K: 304.92 mg
Iron, Fe: 4.158 mg
Zinc, Zn: 1.2936 mg
Phosphorus, P: 367.29 mg
Vitamin A, RAE: 124.74000000000001 µg
Vitamin C, total ascorbic acid: 0.693 mg
Thiamin: 0.46431000000000006 mg
Riboflavin: 0.6491100000000001 mg
Niacin: 3.6267 mg
Vitamin B-6: 0.10626000000000001 mg
Folate, DFE: 129.36 µg
Folate, food: 27.72 µg
Folic acid: 60.06 µg
Vitamin B-12: 0.5082 µg
Water: 122.199 g
