## *Importing required libraries*

In [2]:
import pandas as pd
import gensim
from tensorflow import keras
from tensorflow.keras.utils import to_categorical

## *Merged 2 excel sheets datasets, in total we have used 329 asanas benefits*

In [3]:
df = pd.read_csv('test.csv')

In [15]:
df['Benefits'][0]

'This asana strengthens\nthe abdominal muscles and massages the organs. It strengthens the digestive system, lower back, pelvic and perineal muscles and helps correct prolapse.'

In [16]:
df.head()

Unnamed: 0,Asana,Benefits
0,PADOTTHANASANA,This asana strengthens\nthe abdominal muscles ...
1,PARVATASANA,This pose strengthens the nerves and muscles i...
2,ARDHA TITALI ASANA,This is an excellent \npreparatory practice fo...
3,GATYATMAK MERU \nVAKRASANA,This asana removes stiffness \nof the back and...
4,SIDEWAYS VIEWING,Sideways viewing relaxes the \ntension of the ...


In [4]:
#shape of the data frame
df.shape

(326, 2)

## *Removing Stop words at first and converting it into lower cases so that all stop words can be remove which were in Upper cases.*


In [18]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stop = stopwords.words('english')
#  1st step for removing stop words is to use the library and remove stop words, this will make 
# sure that some basic stop words and numerical values is been removed from the Benefits column.

# 2nd step is that there might be stop words present in upper case for example "This" is a stop words 
# which was not removed in 1st step so I converted the Benefits column to lower case. Moreover we can only  
# lowercase any sentence or entire column when that column is free of any numerical value.

# 3rd then further removing stopwords, in this way we get the whole benefits column free from stop words


from gensim.parsing.preprocessing import remove_stopwords
from gensim.parsing.preprocessing import strip_non_alphanum
from gensim.parsing.preprocessing import strip_numeric
from gensim.parsing.preprocessing import strip_multiple_whitespaces
df['Benefits'] = df['Benefits'].apply(str.lower)
df['Benefits']=df.Benefits.apply(remove_stopwords)
df['Benefits']=df.Benefits.apply(strip_non_alphanum)
df['Benefits']=df.Benefits.apply(strip_numeric)
df['Benefits']=df.Benefits.apply(strip_multiple_whitespaces)
df['Asana']=df.Asana.apply(strip_multiple_whitespaces)
df['Asana'] = df['Asana'].apply(str.lower)
df.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\raist\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,Asana,Benefits
0,padotthanasana,asana strengthens abdominal muscles massages o...
1,parvatasana,pose strengthens nerves muscles limbs back hel...
2,ardha titali asana,excellent preparatory practice loosening knee ...
3,gatyatmak meru vakrasana,asana removes stiffness increases flexibility ...
4,sideways viewing,sideways viewing relaxes tension muscles strai...


## *We have further removed special characters and tokenized each row of benefits.* 

In [19]:
# this will create the list of each benefit rows
benefits =df['Benefits'].apply(gensim.utils.simple_preprocess)
print(benefits)

0      [asana, strengthens, abdominal, muscles, massa...
1      [pose, strengthens, nerves, muscles, limbs, ba...
2      [excellent, preparatory, practice, loosening, ...
3      [asana, removes, stiffness, increases, flexibi...
4      [sideways, viewing, relaxes, tension, muscles,...
                             ...                        
321    [helps, stretching, legs, hamstrings, arms, ch...
322    [relieves, pain, hands, feet, helpful, rheumat...
323    [asana, provides, deep, muscular, massage, abd...
324    [bhunamanasana, stretches, improves, flexibili...
325    [stretches, strengthens, lengthens, mandalasan...
Name: Benefits, Length: 326, dtype: object


In [24]:
# prints the words present in the benefit row 1
benefits[0]

['asana',
 'strengthens',
 'abdominal',
 'muscles',
 'massages',
 'organs',
 'strengthens',
 'digestive',
 'system',
 'lower',
 'back',
 'pelvic',
 'perineal',
 'muscles',
 'helps',
 'correct',
 'prolapse']

## *Implemented Word2Vec with following features:*
* window size = 5
* minimum word cound for which the model can input row wise data = 2
* required cpu  threads to train the model = 4
* size of the required vector embedding = 50

In [27]:

model = gensim.models.Word2Vec(
    window=5,
    min_count=2,
    workers=4,
    vector_size = 50,
)

## *Building vocabulary of unique words present in the entire benefit column*

In [28]:

model.build_vocab(benefits, progress_per=5)
# vocab_len = len(model.wv)
# print(vocab_len)

In [29]:
model.train(benefits, total_examples=model.corpus_count, epochs=2000)

(22224504, 29294000)

## *We can test our model for any words suppose say sciatica we will get the similar words in benefits* 

In [30]:
#when we find the similar words for a disease in 
#Benefits section we also see that we get the names of asanas also.
#like for this word we get similar words like chakrasana, marjariasana, shalabhasana which indicates
#that since we have taken these words from benefits section, it means that for this particular diseaes
#most prpbable asanas can be chakrasana, marjariasana, shalabhasana etc.  to cure it.
model.wv.most_similar("sciatica", topn= 100)

#print(type(model.wv.most_similar("sciatica", topn= 100)))

[('mild', 0.45425891876220703),
 ('injured', 0.42480945587158203),
 ('marjariasana', 0.38857904076576233),
 ('treatment', 0.38853558897972107),
 ('disc', 0.383423388004303),
 ('backache', 0.3821077048778534),
 ('prevents', 0.3601524233818054),
 ('certain', 0.3565708100795746),
 ('problem', 0.3547630310058594),
 ('slipped', 0.3526769280433655),
 ('rid', 0.34552139043807983),
 ('flexibility', 0.33839884400367737),
 ('need', 0.33816424012184143),
 ('arms', 0.33709394931793213),
 ('stiff', 0.33378714323043823),
 ('aches', 0.32953953742980957),
 ('therapeutically', 0.32749485969543457),
 ('benefit', 0.324897825717926),
 ('spondylitis', 0.3222949802875519),
 ('time', 0.3129269480705261),
 ('maintaining', 0.31038257479667664),
 ('carotid', 0.3046204149723053),
 ('rounded', 0.30206137895584106),
 ('relaxes', 0.2995331585407257),
 ('corrects', 0.2925383150577545),
 ('inside', 0.29080629348754883),
 ('lumbago', 0.29048052430152893),
 ('sitting', 0.28524529933929443),
 ('alternately', 0.283051490

## *Printing one of the benefits column word's vector*

In [31]:
# suppose we want to know the vector embedding of a word 'pain' so we have printed the vector embedding of size 50
import numpy as np
print((model.wv.get_vector('pain')))

[ 1.1000563   0.32226804 -1.9302248   6.0603      4.416438    4.7391334
  2.7212057  -3.3992515   7.587476    0.17038737 -6.465132   -0.5932783
 -4.501964    3.7867205  -7.656277    6.808827    7.1899824   0.13583831
  5.449237   -6.717407    3.3181589  -3.2042418   2.0554526   9.354648
 -5.7892213  -2.8275573   3.4504647  -2.5591378  -7.5924187  -0.9921577
 -1.8286586   5.198996   -0.01902388  4.9891095   6.4184422  12.01973
  2.7938776   4.780818    6.6547403   5.913949    0.1576084  -1.7060562
  4.947958   -1.4263921  -1.6679434   2.6411188  -1.5154663   5.078552
 -1.5190635   1.2607638 ]


In [32]:
# counts total number of rows in datasets being trained
model.corpus_count

326

In [33]:
# from gensim.models import Word2Vec
# # created list of unique words from the column Benefits. 
# words = list(w for w in model.wv.vocab)

In [34]:
# List of unique words in the vocabulary
words = list(model.wv.index_to_key)

# Print the first few words
print(words)  # Prints the first 10 words in the vocabulary


['muscles', 'body', 'pose', 'helps', 'organs', 'asana', 'spine', 'improves', 'abdominal', 'hips', 'shoulders', 'strengthens', 'blood', 'practice', 'lower', 'balance', 'stretches', 'stretch', 'tones', 'chest', 'legs', 'flexibility', 'neck', 'good', 'arms', 'posture', 'abdomen', 'nervous', 'mind', 'back', 'flow', 'yoga', 'circulation', 'reproductive', 'pelvic', 'system', 'leg', 'hip', 'entire', 'stimulates', 'awareness', 'nerves', 'spinal', 'core', 'improving', 'deep', 'digestive', 'heart', 'gives', 'digestion', 'joints', 'pressure', 'benefits', 'chakra', 'increases', 'strength', 'concentration', 'great', 'upper', 'energy', 'constipation', 'sense', 'thighs', 'functioning', 'like', 'related', 'toning', 'region', 'glands', 'stretching', 'knees', 'breathing', 'toned', 'especially', 'poses', 'focus', 'hamstrings', 'breath', 'strong', 'massages', 'area', 'liver', 'ankles', 'internal', 'better', 'help', 'improve', 'pain', 'the', 'alignment', 'brain', 'disorders', 'stability', 'flexible', 'leve

In [35]:
# here we have the length of unique words
print(len(words))

1330


## *Created an empty dictionary at first to store the unique words as key words  along with its vector embeddings.*

In [36]:
dict_of_word_embeddings = dict({})
for i in words:
    dict_of_word_embeddings[i] = model.wv[i]

In [37]:
# on printing the dictionary, we get the following result.
print(dict_of_word_embeddings)

{'muscles': array([ 0.46634394, -0.91475654,  2.5783145 ,  0.12493411, -1.1356903 ,
       -3.7141616 , -0.8198054 , -0.9100741 , -0.56567585,  2.1634228 ,
       -1.7862529 , -1.1338631 ,  0.2927218 , -1.9187555 , -0.9742689 ,
       -0.71258444,  5.1171393 ,  0.6490111 , -3.406924  ,  1.725708  ,
        0.23306397, -0.7682961 , -2.7175717 , -2.1356087 ,  1.8023326 ,
        0.6797846 , -1.4303865 , -3.3858416 , -3.3897648 ,  0.46971256,
       -0.4867956 ,  0.7648104 , -1.0619305 ,  1.7708702 , -0.13873132,
        0.5967032 , -1.6750422 ,  1.6154981 ,  2.6825273 , -0.29144588,
       -0.20803748,  0.15212946,  0.82340693, -3.9111683 , -0.15389054,
       -0.46844319,  0.7721373 ,  0.6583611 , -1.7425859 ,  1.1390502 ],
      dtype=float32), 'body': array([ 1.37713   ,  1.9714247 ,  1.2513533 ,  0.5463303 , -0.07001559,
       -0.24291956,  1.138048  , -1.5737425 ,  0.60283625,  0.4253349 ,
       -1.320423  ,  0.36260587,  2.7715    ,  0.6321337 , -3.9918473 ,
        0.8097463 ,  

## *Exporting dictionary data into separate excel sheet/ csv file*.

In [38]:
Unique_words = dict_of_word_embeddings.keys()
word_vectors  = dict_of_word_embeddings.values()
# print(asanas, word_vectors)
d = {'Unique_words' : Unique_words , 'Word_Vectors' : word_vectors}
dataframe = pd.DataFrame(data = d)
dataframe

Unnamed: 0,Unique_words,Word_Vectors
0,muscles,"[0.46634394, -0.91475654, 2.5783145, 0.1249341..."
1,body,"[1.37713, 1.9714247, 1.2513533, 0.5463303, -0...."
2,pose,"[-1.010646, -1.5965734, 1.8982941, -0.3611227,..."
3,helps,"[-0.7490367, -0.67299503, -0.07158308, -0.2604..."
4,organs,"[1.7304231, -0.6442864, -0.4366188, -2.9156811..."
...,...,...
1325,little,"[0.4547904, 4.093418, -2.021598, 2.591361, 2.6..."
1326,trikonasana,"[2.9900422, 0.43878216, 3.7531257, 4.523257, 3..."
1327,migraine,"[-4.319514, 2.2407453, 2.1898117, 5.6057577, 6..."
1328,heals,"[-2.1189978, 0.21470195, 3.006428, 1.6113621, ..."


## *We have made list of unique asanas as after merging excel sheets there were repeated asanas*

In [39]:
asanas = list(df['Asana'])
# total asanas present ( with repetition)
print(len(asanas))
asana = []
      

for x in asanas:
  if x not in asana:
    asana.append(x)
# total number of unique asanas
print(len(asana))    
# list of unique asanas
print(asana)

326
293
['padotthanasana', 'parvatasana', 'ardha titali asana', 'gatyatmak meru vakrasana', 'sideways viewing', 'makarasana', 'padmasana', 'vajrasana', 'ardha chandrasana', 'yogamudrasana', 'bhujangasana', 'saithalyasana', 'bhu namanasana', 'sarvangasana', 'natarajasana', 'poorna bhujangasana', 'koormasana', 'poorna shalabhasana', 'poorna dhanurasana', 'bandha hasta utthanasana ', 'shava udarakarshanasana ', 'chakki chalanasana ', 'kashtha takshanasana ', 'vayu nishkasana', 'ushtrasana', 'samakonasana ', 'matsyasana', 'kandharasana', ' setu asana ', 'paschimottanasana', 'meru akarshanasana', 'pada hastasana', 'seetkari pranayama', 'jalandhara bandha', 'tadagi mudra', 'maha vedha mudra', 'shashankasana', 'janu chakra', 'poorna titali asana', 'manibandha chakra', 'skandha chakra', 'greeva sanchalana', 'padachakrasana', 'pada sanchalanasana', 'supta pawanmuktasana', 'jhulana lurhakanasana', 'supta udarakarshanasana', 'naukasana', 'rajju karshanasana', 'nauka sanchalanasana', 'namaskarasan

## *On-hot Encodded the asanas words and created a dictionary to store the asana word with its word embedding*

In [40]:
# from tensorflow import keras
# from tensorflow.keras.utils import to_categorical

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(asana)
#print(integer_encoded)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1) #Converts the integer-encoded labels into a 2D array required by the OneHotEncoder.


### One hot encoding: Converts the integer labels into one-hot encoded vectors.
onehot_encoder = OneHotEncoder(sparse=False)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

# onehot_encoded

#Purpose: Creates a dictionary where:
# Key: The asana name.
# Value: The corresponding one-hot encoded vector.

asan_dict={}

for i in range(len(asana)):
  asan_dict[asana[i]] = onehot_encoded[i]

print(asan_dict) 
# asan_dict['padotthanasana'].size




{'padotthanasana': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
      

In [41]:
from tensorflow import keras
from tensorflow.keras.layers import Dense

In [42]:
# printing the benefit words of one row.
print(benefits[0])

['asana', 'strengthens', 'abdominal', 'muscles', 'massages', 'organs', 'strengthens', 'digestive', 'system', 'lower', 'back', 'pelvic', 'perineal', 'muscles', 'helps', 'correct', 'prolapse']


## *Created a Tupple associating the benefits words of each row with the respective asana. In order to bring the associativity between benefits words and asana words*

In [47]:
pair=[]

i=0
a=len(asana)
for x in benefits:
  if(i<a):
    target=asana[i]
    for y in x:
      if(y not in words):
        continue
      pair.append((y,target))
  i+=1  
print(pair)




[('asana', 'padotthanasana'), ('strengthens', 'padotthanasana'), ('abdominal', 'padotthanasana'), ('muscles', 'padotthanasana'), ('massages', 'padotthanasana'), ('organs', 'padotthanasana'), ('strengthens', 'padotthanasana'), ('digestive', 'padotthanasana'), ('system', 'padotthanasana'), ('lower', 'padotthanasana'), ('back', 'padotthanasana'), ('pelvic', 'padotthanasana'), ('muscles', 'padotthanasana'), ('helps', 'padotthanasana'), ('correct', 'padotthanasana'), ('prolapse', 'padotthanasana'), ('pose', 'parvatasana'), ('strengthens', 'parvatasana'), ('nerves', 'parvatasana'), ('muscles', 'parvatasana'), ('limbs', 'parvatasana'), ('back', 'parvatasana'), ('helps', 'parvatasana'), ('increase', 'parvatasana'), ('height', 'parvatasana'), ('stretching', 'parvatasana'), ('muscles', 'parvatasana'), ('ligaments', 'parvatasana'), ('enabling', 'parvatasana'), ('growing', 'parvatasana'), ('bones', 'parvatasana'), ('grow', 'parvatasana'), ('longer', 'parvatasana'), ('circulation', 'parvatasana'), 

## *Making a 2-D array of context words(benefit words) and Target words(asana words) by numpy stack*

In [48]:
contexts=[dict_of_word_embeddings[context] for context,target in pair]
contexts=np.vstack(contexts)
# shape of the context words matrix
contexts.shape # all vector embedding vectors are stacked vertically into a 2D numpy array.


(12085, 50)

In [49]:
contexts

array([[ 3.3084905 , -1.5114819 ,  1.9002358 , ...,  1.493281  ,
        -0.11693577, -1.9406389 ],
       [ 1.9807843 , -0.870829  ,  3.0639663 , ...,  0.48360732,
        -1.429632  , -0.4359957 ],
       [ 2.9201853 , -3.3404377 , -2.934537  , ..., -0.255731  ,
        -3.7639718 ,  0.47409016],
       ...,
       [-0.7490367 , -0.67299503, -0.07158308, ..., -0.20611694,
         0.13005823, -0.12480806],
       [-0.8133205 , -2.6125422 ,  0.0862323 , ...,  1.2918379 ,
        -4.537013  ,  2.275304  ],
       [ 1.37713   ,  1.9714247 ,  1.2513533 , ..., -1.6213106 ,
        -2.3937314 ,  1.2456352 ]], dtype=float32)

In [50]:
targets=[asan_dict[target] for context,target in pair]
targets=np.vstack(targets)
# shape of the target words matrix

targets

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

## *Implemented **Artificial Neural Network**. with the help of tensorfow's keras functional API with the following features:*

* Number of input layer = 1
* Size of input layer = 50 unit
* Number of dense layer = 1
* size of dense layer = 1000 units
* Number of output layer = 1
* size of output unit = 293
* activation function = softmax
* loss = categorical_crossentropy
* optimizer = adam
* number of epotchs = 100

In [51]:
network_input = keras.Input(shape=contexts.shape[1], name='input_layer')
                                                                                    # Create a hidden layer for the network; store under 'hidden_layer'
hidden_layer1 = Dense(units=1000, activation='sigmoid', name='hidden_layer1')(network_input)

                                                                                            # Create an output layer for the network; store under 'output_layer'
output_layer = Dense(units=targets.shape[1], activation='softmax', name='output_layer')(hidden_layer1)

                                                                          # Create a Keras Model; store under 'embedding_model'
embedding_model = keras.Model(inputs=network_input, outputs=output_layer)

                                                          # Compile the model for training; define loss function
embedding_model.compile(loss='categorical_crossentropy',optimizer = 'adam', metrics = ['accuracy'])

                                                          # Print out a summary of the model
embedding_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 50)]              0         
                                                                 
 hidden_layer1 (Dense)       (None, 1000)              51000     
                                                                 
 output_layer (Dense)        (None, 293)               293293    
                                                                 
Total params: 344293 (1.31 MB)
Trainable params: 344293 (1.31 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [52]:
# Fit a model to the data
embedding_model.fit(x=contexts,   # inputs
                    y=targets,   # outputs
                    batch_size=1024,  # how many pairs of words processed simultaneously
                    epochs=100,   # how many times we loop through the whole data
                    verbose=1   # do not print training status
                   )



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x1a57fb8c690>

## *Function which inputs the user details and suggests user with the most recommended asanas*

In [None]:
from collections import Counter
from IPython.display import clear_output

def magic():
  predicted_asanas = []
  user_input_words= []
  final_predicted_asanas = []
  number_in_words = ['first','second', 'third', 'fourth']
  for i in range(4):
    user_input_words.append(input(f"Enter {number_in_words[i]} benefit word:  "))
  for i in user_input_words:
    if i in dict_of_word_embeddings:

      input_array = np.expand_dims(dict_of_word_embeddings[i], axis=0)
      prediction = embedding_model.predict(input_array)
      print(prediction)
      flatten_pred = prediction.flatten()
      result_indices = flatten_pred.argsort()[-10:][::-1]
    
      for result in result_indices:
        predicted_asanas.append(asana[result])
    
    
  counter_found = Counter(predicted_asanas)
  final_predicted_asanas_with_freq = counter_found.most_common(7)
  # print(final_predicted_asanas_with_freq)

  for yoga, freq in final_predicted_asanas_with_freq:
    final_predicted_asanas.append(yoga)
  
  print(final_predicted_asanas)
  choice=input("Clear output: Y/N ")
  if choice=='Y':
   clear_output()
  
  
magic()

Enter first benefit word:  pain
Enter second benefit word:  back
Enter third benefit word:  head
Enter fourth benefit word:  leg
[[4.19180242e-06 1.05230504e-06 5.42575202e-04 1.06173127e-06
  2.76531555e-05 4.16982948e-04 6.07841400e-07 1.15782814e-06
  2.27867513e-05 1.07193785e-06 1.86161913e-07 5.47117030e-04
  7.61873707e-06 6.51376831e-06 7.08886500e-06 2.85810847e-06
  5.37746064e-06 2.14024863e-06 1.27548346e-05 5.18586487e-04
  4.14305367e-04 2.46618242e-06 4.09504100e-05 5.56348176e-08
  6.05479181e-05 4.26048442e-04 5.40141154e-05 7.86874523e-07
  2.59666886e-05 8.73656177e-07 1.34019647e-03 1.49632542e-04
  7.82566474e-08 1.07736605e-05 1.39321372e-08 3.10482246e-05
  1.18544460e-06 1.95887333e-06 2.83173699e-06 1.86385405e-05
  1.52082421e-05 1.44896738e-04 1.01094031e-06 3.83578939e-04
  1.61411754e-05 6.20200444e-05 1.47029186e-05 1.45192671e-05
  5.96666223e-06 3.17569450e-02 5.56237101e-05 1.91489886e-02
  3.06238973e-04 3.38815502e-04 1.17529213e-04 1.28331798e-04
  1

[[1.51812358e-07 3.56261836e-07 9.65991057e-06 1.95196009e-07
  1.34884237e-06 9.87562362e-06 1.04536572e-04 5.60112028e-07
  2.58420896e-05 2.89096584e-04 2.05726269e-06 1.36203889e-04
  2.56941144e-06 1.88213028e-06 1.10112822e-04 2.14625878e-04
  3.33652170e-05 1.09533161e-07 1.51546431e-06 1.12606533e-04
  2.31160506e-04 1.35858618e-05 1.78573316e-06 8.50499418e-06
  6.25813811e-07 1.43250494e-04 3.98926602e-07 3.04902642e-06
  4.66034976e-07 2.34815525e-03 4.51206695e-04 3.43191475e-02
  4.34402546e-06 3.92564168e-08 1.31670328e-08 2.32260709e-06
  4.17513866e-06 1.97201246e-07 1.42159946e-08 1.16557466e-08
  7.64444267e-05 2.00665618e-05 1.13895176e-05 2.09732156e-04
  1.89709841e-04 3.15424245e-06 2.15410819e-06 3.32482898e-07
  2.38040684e-06 3.38269980e-04 3.07808332e-05 3.72849854e-06
  1.94888475e-04 1.82287200e-04 5.58544621e-02 2.08175345e-03
  3.18152306e-04 3.60449553e-02 1.63080567e-05 1.31575252e-05
  1.15504372e-06 4.60714509e-04 1.09588555e-05 7.84071817e-05
  7.9960

In [50]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\raist\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [62]:
# Define the updated magic function
import tensorflow as tf
from tensorflow import keras
import pickle

# Load the pre-trained model
embedding_model = keras.models.load_model('embedding_model.h5')

# Load the word embeddings dictionary
with open('word_embeddings.pkl', 'rb') as file:
    dict_of_word_embeddings = pickle.load(file)


def magic():
    predicted_asanas = []
    final_predicted_asanas = []
    
    # Get user input as a sentence
    user_input = input("What is your problem? Describe it in one sentence: ")
    
    # Preprocess the input: tokenize, remove stop words, and clean
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(re.sub(r'\W+', ' ', user_input.lower()))
    filtered_words = [word for word in tokens if word not in stop_words]
    
    print(f"Processed words from your input: {filtered_words}")
    
    # Predict asanas for each word
    for word in filtered_words:
        if word in dict_of_word_embeddings:
            input_array = np.expand_dims(dict_of_word_embeddings[word], axis=0)
            prediction = embedding_model.predict(input_array)
            flatten_pred = prediction.flatten()
            result_indices = flatten_pred.argsort()[-10:][::-1]

            for result in result_indices:
                predicted_asanas.append(asana[result])
    
    # Count frequency of predicted asanas
    counter_found = Counter(predicted_asanas)
    final_predicted_asanas_with_freq = counter_found.most_common(7)
    # Extract the top 7 asanas
    for yoga, freq in final_predicted_asanas_with_freq:
        final_predicted_asanas.append(yoga)
    
    # Display results
    print("Recommended yoga asanas for your problem:")
    print(final_predicted_asanas)
    
    # Option to clear output
#     choice = input("Clear output: Y/N ")
#     if choice.upper() == 'Y':
#         clear_output()

# Call the function
magic()


What is your problem? Describe it in one sentence: jyfyujf yjf,jf,ykfyj,fyj fyyufyufyuf ylfyulfyulf yul fyifyif ylifylifyi fyli fyif yli fyi f  pain , head, leg back
Processed words from your input: ['jyfyujf', 'yjf', 'jf', 'ykfyj', 'fyj', 'fyyufyufyuf', 'ylfyulfyulf', 'yul', 'fyifyif', 'ylifylifyi', 'fyli', 'fyif', 'yli', 'fyi', 'f', 'pain', 'head', 'leg', 'back']
Recommended yoga asanas for your problem:
[' eka padasana', 'parivritti janu sirshasana', 'salabhasana', 'mandalasana', 'trataka', 'marjariasan', 'mandukasana']


In [55]:
# Save the trained model
embedding_model.save('embedding_model.h5')


  saving_api.save_model(


In [56]:
import pickle

# Save the word embeddings dictionary
with open('word_embeddings.pkl', 'wb') as file:
    pickle.dump(dict_of_word_embeddings, file)


In [57]:
import tensorflow as tf
from tensorflow import keras
import pickle

# Load the pre-trained model
embedding_model = keras.models.load_model('embedding_model.h5')

# Load the word embeddings dictionary
with open('word_embeddings.pkl', 'rb') as file:
    dict_of_word_embeddings = pickle.load(file)


In [59]:
# Test loading the model
print(embedding_model.summary())

# Test loading the word embeddings dictionary
print(len(dict_of_word_embeddings))  # Number of words in the dictionary
print(dict_of_word_embeddings['pain'])  # Example word embedding


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 50)]              0         
                                                                 
 hidden_layer1 (Dense)       (None, 1000)              51000     
                                                                 
 output_layer (Dense)        (None, 293)               293293    
                                                                 
Total params: 344293 (1.31 MB)
Trainable params: 344293 (1.31 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
1330
[ 0.6291259  -3.3845065   2.2620244   2.0459297   4.8857775   2.2315004
  7.5501094  -0.70111424 12.861071    2.4441192  -6.415813    0.8094724
 -1.0558401   3.7067482  -7.6622095   6.9599185  -1.0589273  -1.6303406
 -2.1498697  -2.3979862   0.95143825  2.0239458   2.9360132   9

In [63]:
with open('asana_list.pkl', 'wb') as file:
    pickle.dump(asanas, file)

print("Asana list saved successfully.")

Asana list saved successfully.


In [64]:
import pickle

# Load the list from the file
with open('asana_list.pkl', 'rb') as file:
    asana = pickle.load(file)

print("Asana list retrieved successfully:")
print(asana)

Asana list retrieved successfully:
['padotthanasana', 'parvatasana', 'ardha titali asana', 'gatyatmak meru vakrasana', 'sideways viewing', 'makarasana', 'padmasana', 'vajrasana', 'ardha chandrasana', 'yogamudrasana', 'bhujangasana', 'saithalyasana', 'bhu namanasana', 'sarvangasana', 'natarajasana', 'poorna bhujangasana', 'koormasana', 'poorna shalabhasana', 'poorna dhanurasana', 'bandha hasta utthanasana ', 'shava udarakarshanasana ', 'chakki chalanasana ', 'kashtha takshanasana ', 'vayu nishkasana', 'ushtrasana', 'samakonasana ', 'matsyasana', 'kandharasana', ' setu asana ', 'paschimottanasana', 'meru akarshanasana', 'pada hastasana', 'seetkari pranayama', 'jalandhara bandha', 'tadagi mudra', 'maha vedha mudra', 'shashankasana', 'janu chakra', 'poorna titali asana', 'manibandha chakra', 'skandha chakra', 'greeva sanchalana', 'padachakrasana', 'pada sanchalanasana', 'supta pawanmuktasana', 'jhulana lurhakanasana', 'supta udarakarshanasana', 'naukasana', 'rajju karshanasana', 'gatyatmak