In [None]:
# import required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
import re
import nltk
nltk.download('punkt')
import tensorflow_hub as hub
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # convert to lowercase
    text = text.lower()
    # remove punctuation and digits
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    # remove stopwords and lemmatize
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    text = ' '.join(tokens)
    return text


# load the dataset
data = pd.read_csv('dataset.csv')

# preprocess the data
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
X = np.array([embed([preprocess_text(q + ' ' + a)]).numpy().flatten() for q, a in zip(data['question'], data['student_answer'])])
y = data['score_avg']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model and fit it to the training data
model = MLPRegressor(hidden_layer_sizes=(100,50), activation='relu', solver='adam', max_iter=500, random_state=42)
model.fit(X_train, y_train)

# evaluate the model on the testing set
y_pred = model.predict(X_test)

# compute evaluation metrics
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

# print evaluation metrics
print('R-squared:', r2)
print('Mean absolute error:', mae)
print('Mean squared error:', mse)
print('Root mean squared error:', rmse)

# deploy the model to production
def predict_relevance(question, desired_answer, student_answer):
    # preprocess the text
    question = preprocess_text(question)
    desired_answer = preprocess_text(desired_answer)
    student_answer = preprocess_text(student_answer)

    # concatenate question and student_answer texts
    input_text = question + ' ' + student_answer
    
    # encode input text using Universal Sentence Encoder
    input_emb = embed([input_text]).numpy().flatten()
    
    # predict the relevance score of the input text
    score = model.predict([input_emb])[0]
    print(score)
    # compute cosine similarity between desired_answer and student_answer texts
    similarity = cosine_similarity(embed([desired_answer]).numpy(), embed([student_answer]).numpy())[0][0]
    print("Cosine ", similarity)
    
    # adjust score based on similarity
    if similarity < 0.4:
        score -= 1
    elif similarity >= 0.4 and similarity < 0.8:
        score += 1
    else:
        score += 2
    
    # ensure score is between 0 and 5
    score = min(max(score, 0), 5)
    
    return score


question = "What is the difference between an array declared as static, and one that is not?"
desired_answer = "The arrays declared as static live throughout the life of the program; that is, they are initialized only once, when the function that declares the array it is first called."
student_answer = "a static array will store the new values that were assigned to each of its elements. meaning if you call a function twice it will use the last values that were returned the first time.  if you don't declare it static then the new values will not be stored and will be reset to their original value"
new_answer = "adt adt adt adt adt adt"
newAnswer = "An array declared as static has a fixed size and retains its value between function calls, while a non-static array does not retain its value and its size can change during runtime."
print("=================== Irrelevant answers ==========================")
print(predict_relevance(question, desired_answer, new_answer))
print()
print(predict_relevance(question, desired_answer, "I don't know"))
print()
print(predict_relevance(question, desired_answer, "static one stays while non static one leaves"))
print()
print(predict_relevance(question, desired_answer, "one is memory allocated while other is storage allocated"))
print()
print(predict_relevance(question, desired_answer, "The static array is like a classic car that never goes out of style, while the non-static array is like a trendy new smartphone that quickly becomes outdated."))
print()
print(predict_relevance(question, desired_answer, "static array, non-static array, static array non-static array static array non-static array static array non-static array static array non-static array static array non-static array "))
print()
print(predict_relevance(question, desired_answer, "An array is something which starts with square brackets and all values inside it are seaperated by commas thats why it is static"))
print()
print(predict_relevance(question, desired_answer, "The static array is something that is not non static. Thats why it is called as static array."))
print()


print("=================== relevant answers ==========================")
print(predict_relevance(question, desired_answer, student_answer))
print()
print(predict_relevance(question, desired_answer, newAnswer))
print()
print(predict_relevance(question, desired_answer, "A static array persists throughout the lifetime of a program, while a non-static array is deallocated once it goes out of scope."))
print()
print(predict_relevance(question, desired_answer, "A static array is initialized to zero by default, while a non-static array has an undefined initial value unless explicitly initialized."))
print()
print(predict_relevance(question, desired_answer, "A static array can be accessed from any function within a file, while a non-static array is only accessible within the function in which it is declared."))
print()
print(predict_relevance(question, desired_answer, "A static array can be initialized only once, at compile time, while a non-static array can be initialized multiple times, including at runtime."))
print()
print(predict_relevance(question, desired_answer, "A static array can have a fixed size that is determined at compile time, while a non-static array can have a variable size that is determined at runtime."))
print()


In [None]:
# import required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
import re
import nltk
nltk.download('punkt')
import tensorflow_hub as hub
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # convert to lowercase
    text = text.lower()
    # remove punctuation and digits
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    # remove stopwords and lemmatize
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    text = ' '.join(tokens)
    return text


# load the dataset
data = pd.read_csv('dataset.csv')

# preprocess the data
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
X = np.array([embed([preprocess_text(q + ' ' + a)]).numpy().flatten() for q, a in zip(data['question'], data['student_answer'])])
y = data['score_avg']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model and fit it to the training data
model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
model.fit(X_train, y_train)

# evaluate the model on the testing set
y_pred = model.predict(X_test)

# compute evaluation metrics
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

# print evaluation metrics
print('R-squared:', r2)
print('Mean absolute error:', mae)
print('Mean squared error:', mse)
print('Root mean squared error:', rmse)

# deploy the model to production
def predict_relevance(question, desired_answer, student_answer):
    # preprocess the text
    question = preprocess_text(question)
    desired_answer = preprocess_text(desired_answer)
    student_answer = preprocess_text(student_answer)

    # concatenate question and student_answer texts
    input_text = question + ' ' + student_answer
    
    # encode input text using Universal Sentence Encoder
    input_emb = embed([input_text]).numpy().flatten()
    
    # predict the relevance score of the input text
    score = model.predict([input_emb])[0]
    print(score)
    # compute cosine similarity between desired_answer and student_answer texts
    similarity = cosine_similarity(embed([desired_answer]).numpy(), embed([student_answer]).numpy())[0][0]
    print("Cosine ", similarity)
    
    # adjust score based on similarity
    if similarity < 0.4:
        score -= 1
    elif similarity >= 0.4 and similarity < 0.8:
        score += 1
    else:
        score += 2
    
    # ensure score is between 0 and 5
    score = min(max(score, 0), 5)
    
    return score


question = "What is the difference between an array declared as static, and one that is not?"
desired_answer = "The arrays declared as static live throughout the life of the program; that is, they are initialized only once, when the function that declares the array it is first called."
student_answer = "a static array will store the new values that were assigned to each of its elements. meaning if you call a function twice it will use the last values that were returned the first time.  if you don't declare it static then the new values will not be stored and will be reset to their original value"
new_answer = "adt adt adt adt adt adt"
newAnswer = "An array declared as static has a fixed size and retains its value between function calls, while a non-static array does not retain its value and its size can change during runtime."
print("=================== Irrelevant answers ==========================")
print(predict_relevance(question, desired_answer, new_answer))
print()
print(predict_relevance(question, desired_answer, "I don't know"))
print()
print(predict_relevance(question, desired_answer, "static one stays while non static one leaves"))
print()
print(predict_relevance(question, desired_answer, "one is memory allocated while other is storage allocated"))
print()
print(predict_relevance(question, desired_answer, "The static array is like a classic car that never goes out of style, while the non-static array is like a trendy new smartphone that quickly becomes outdated."))
print()
print(predict_relevance(question, desired_answer, "static array, non-static array, static array non-static array static array non-static array static array non-static array static array non-static array static array non-static array "))
print()
print(predict_relevance(question, desired_answer, "An array is something which starts with square brackets and all values inside it are seaperated by commas thats why it is static"))
print()
print(predict_relevance(question, desired_answer, "The static array is something that is not non static. Thats why it is called as static array."))
print()


print("=================== relevant answers ==========================")
print(predict_relevance(question, desired_answer, student_answer))
print()
print(predict_relevance(question, desired_answer, newAnswer))
print()
print(predict_relevance(question, desired_answer, "A static array persists throughout the lifetime of a program, while a non-static array is deallocated once it goes out of scope."))
print()
print(predict_relevance(question, desired_answer, "A static array is initialized to zero by default, while a non-static array has an undefined initial value unless explicitly initialized."))
print()
print(predict_relevance(question, desired_answer, "A static array can be accessed from any function within a file, while a non-static array is only accessible within the function in which it is declared."))
print()
print(predict_relevance(question, desired_answer, "A static array can be initialized only once, at compile time, while a non-static array can be initialized multiple times, including at runtime."))
print()
print(predict_relevance(question, desired_answer, "A static array can have a fixed size that is determined at compile time, while a non-static array can have a variable size that is determined at runtime."))
print()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


R-squared: 0.3568007295451401
Mean absolute error: 0.6416389395914556
Mean squared error: 0.8269381597435779
Root mean squared error: 0.9093614021628463
2.897167617455096
Cosine  0.14655071
1.897167617455096

1.453086525489041
Cosine  -0.016367897
0.45308652548904105

2.870634974206055
Cosine  0.2534617
1.8706349742060548

2.780173678593783
Cosine  0.32066128
1.780173678593783

3.68748518799407
Cosine  0.3012029
2.68748518799407

3.2204192687397604
Cosine  0.56606376
4.220419268739761

2.4229418097225444
Cosine  0.506612
3.4229418097225444

3.9501894883596687
Cosine  0.56753397
4.950189488359669

4.9001978594859
Cosine  0.46199876
5

4.320114097287814
Cosine  0.5833217
5

4.378520515014515
Cosine  0.70521516
5

4.448256202369251
Cosine  0.6507589
5

3.875730990502816
Cosine  0.60621214
4.8757309905028166

4.129722257868863
Cosine  0.6724781
5

4.871960606126379
Cosine  0.52879477
5



In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.3-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m87.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m103.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.3


In [None]:
# import required libraries
import pandas as pd
import numpy as np
import re
import tensorflow_hub as hub
import tensorflow as tf
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

stop_words = set(stopwords.words('english'))

# preprocess text function
def preprocess_text(text):
    # convert to lowercase
    text = text.lower()
    # remove punctuation and digits
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    # remove stopwords
    tokens = text.split()
    tokens = [token for token in tokens if token not in stop_words]
    text = ' '.join(tokens)
    return text

# load the dataset
data = pd.read_csv('dataset.csv')

# preprocess the data
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
X = np.array([preprocess_text(q + ' ' + a) for q, a in zip(data['question'], data['student_answer'])])
y = data['score_avg']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# define the LSTM model
model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", input_shape=[], dtype=tf.string, trainable=False),
    tf.keras.layers.Reshape((512,), input_shape=(1, 512)),
    tf.keras.layers.RepeatVector(32),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, activation='relu', return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, activation='relu')),
    tf.keras.layers.Dense(1, activation='linear')
])

# compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error', 'mean_squared_error'])

# define early stopping
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# fit the model to the training data with early stopping
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=16, callbacks=[early_stop])

# evaluate the model on the testing set
y_pred = model.predict(X_test)

# compute evaluation metrics
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

# print evaluation metrics
print('R-squared:', r2)
print('Mean absolute error:', mae)
print('Mean squared error:', mse)
print('Root mean squared error:', rmse)

# deploy the model to production
def predict_relevance(question, desired_answer, student_answer):
    # preprocess the text
    question = preprocess_text(question)
    desired_answer = preprocess_text(desired_answer)
    student_answer = preprocess_text(student_answer)

    # concatenate question and student_answer texts
    input_text = question + ' ' + student_answer
    
    # predict the relevance score of the input text
    score = model.predict([input_text])[0][0]
    
    print(score)
    # compute cosine similarity between desired_answer and student_answer texts
    similarity = cosine_similarity(embed([desired_answer]).numpy(), embed([student_answer]).numpy())[0][0]
    print("Cosine ", similarity)
    
    # adjust score based on similarity
    if similarity < 0.4:
        score -= 1
    elif similarity >= 0.4 and similarity < 0.8:
        score += 1
    else:
        score += 2
    
    # ensure score is between 0 and 5
    score = min(max(score, 0), 5)
    
    return score

question = "What is the difference between an array declared as static, and one that is not?"
desired_answer = "The arrays declared as static live throughout the life of the program; that is, they are initialized only once, when the function that declares the array it is first called."
student_answer = "a static array will store the new values that were assigned to each of its elements. meaning if you call a function twice it will use the last values that were returned the first time.  if you don't declare it static then the new values will not be stored and will be reset to their original value"
new_answer = "adt adt adt adt adt adt"
newAnswer = "An array declared as static has a fixed size and retains its value between function calls, while a non-static array does not retain its value and its size can change during runtime."
print("=================== Irrelevant answers ==========================")
print(predict_relevance(question, desired_answer, new_answer))
print()
print(predict_relevance(question, desired_answer, "I don't know"))
print()
print(predict_relevance(question, desired_answer, "static one stays while non static one leaves"))
print()
print(predict_relevance(question, desired_answer, "one is memory allocated while other is storage allocated"))
print()
print(predict_relevance(question, desired_answer, "The static array is like a classic car that never goes out of style, while the non-static array is like a trendy new smartphone that quickly becomes outdated."))
print()
print(predict_relevance(question, desired_answer, "static array, non-static array, static array non-static array static array non-static array static array non-static array static array non-static array static array non-static array "))
print()
print(predict_relevance(question, desired_answer, "An array is something which starts with square brackets and all values inside it are seaperated by commas thats why it is static"))
print()
print(predict_relevance(question, desired_answer, "The static array is something that is not non static. Thats why it is called as static array."))
print()


print("=================== relevant answers ==========================")
print(predict_relevance(question, desired_answer, student_answer))
print()
print(predict_relevance(question, desired_answer, newAnswer))
print()
print(predict_relevance(question, desired_answer, "A static array persists throughout the lifetime of a program, while a non-static array is deallocated once it goes out of scope."))
print()
print(predict_relevance(question, desired_answer, "A static array is initialized to zero by default, while a non-static array has an undefined initial value unless explicitly initialized."))
print()
print(predict_relevance(question, desired_answer, "A static array can be accessed from any function within a file, while a non-static array is only accessible within the function in which it is declared."))
print()
print(predict_relevance(question, desired_answer, "A static array can be initialized only once, at compile time, while a non-static array can be initialized multiple times, including at runtime."))
print()
print(predict_relevance(question, desired_answer, "A static array can have a fixed size that is determined at compile time, while a non-static array can have a variable size that is determined at runtime."))
print()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
R-squared: 0.34297537002504863
Mean absolute error: 0.6165607263753702
Mean squared error: 0.8122217063227222
Root mean squared error: 0.9012334360878552
1.5764737
Cosine  0.15621793
0.5764737129211426

1.6514018
Cosine  0.0060708523
0.6514017581939697

2.4003417
Cosine  0.2627005
1.4003417491912842

3.8555293
Cosine  0.31882492
2.855529308319092

2.5849066
Cosine  0.30789843
1.5849065780639648

3.1605184
Cosine  0.55918676
4.160518407821655

2.9564688
Cosine  0.45380533
3.9564688205718994

3.056217
Cosine  0.55349475
4.0562169551849365

3.4422517
Cosine  0.477167
4.442251682281494

3.6194117
Cosine  0.5726348
4.6194117069244385

3.8958628
Cosine  0.7027976
4.895862817764282

4.092353
Cosine  0.64588
5

3.013603
Cosine  0.58219373
4.01360297203064

4.305527
Cosine  0.69239616
5

3.8054116
Cosine  0.52618045
4.8054115772247314



In [None]:
# import required libraries
import pandas as pd
import numpy as np
import re
import tensorflow_hub as hub
import tensorflow as tf
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from tensorflow.keras import regularizers

stop_words = set(stopwords.words('english'))

# preprocess text function
def preprocess_text(text):
    # convert to lowercase
    text = text.lower()
    # remove punctuation and digits
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    # remove stopwords
    tokens = text.split()
    tokens = [token for token in tokens if token not in stop_words]
    text = ' '.join(tokens)
    return text

# load the dataset
data = pd.read_csv('dataset.csv')

# preprocess the data
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
X = np.array([preprocess_text(q + ' ' + a) for q, a in zip(data['question'], data['student_answer'])])
y = data['score_avg']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# define the LSTM model
model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", input_shape=[], dtype=tf.string, trainable=False),
    tf.keras.layers.Reshape((512,), input_shape=(1, 512)),
    tf.keras.layers.RepeatVector(32),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.01))),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, activation='relu', kernel_regularizer=regularizers.l2(0.01))),
    tf.keras.layers.Dense(1, activation='linear')
])

# compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error', 'mean_squared_error'])

# define early stopping
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# fit the model to the training data with early stopping
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=16, callbacks=[early_stop])

# evaluate the model on the testing set
y_pred = model.predict(X_test)

# compute evaluation metrics
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

# print evaluation metrics
print('R-squared:', r2)
print('Mean absolute error:', mae)
print('Mean squared error:', mse)
print('Root mean squared error:', rmse)

# deploy the model to production
def predict_relevance(question, desired_answer, student_answer):
    # preprocess the text
    question = preprocess_text(question)
    desired_answer = preprocess_text(desired_answer)
    student_answer = preprocess_text(student_answer)

    # concatenate question and student_answer texts
    input_text = question + ' ' + student_answer
    
    # predict the relevance score of the input text
    score = model.predict([input_text])[0][0]
    print(score)
    
    # compute cosine similarity between desired_answer and student_answer texts
    similarity = cosine_similarity(embed([desired_answer]).numpy(), embed([student_answer]).numpy())[0][0]
    print("Cosine ", similarity)
    
    # adjust score based on similarity
    if similarity < 0.4:
        score -= 1
    elif similarity >= 0.4 and similarity < 0.8:
        score += 1
    else:
        score += 2
    
    # ensure score is between 0 and 5
    score = min(max(score, 0), 5)
    
    return score

question = "What is the difference between an array declared as static, and one that is not?"
desired_answer = "The arrays declared as static live throughout the life of the program; that is, they are initialized only once, when the function that declares the array it is first called."
student_answer = "a static array will store the new values that were assigned to each of its elements. meaning if you call a function twice it will use the last values that were returned the first time.  if you don't declare it static then the new values will not be stored and will be reset to their original value"
new_answer = "adt adt adt adt adt adt"
newAnswer = "An array declared as static has a fixed size and retains its vealue between function calls, while a non-static array does not retain its value and its size can change during runtime."
print("=================== Irrelevant answers ==========================")
print(predict_relevance(question, desired_answer, new_answer))
print()
print(predict_relevance(question, desired_answer, "I don't know"))
print()
print(predict_relevance(question, desired_answer, "static one stays while non static one leaves"))
print()
print(predict_relevance(question, desired_answer, "one is memory allocated while other is storage allocated"))
print()
print(predict_relevance(question, desired_answer, "The static array is like a classic car that never goes out of style, while the non-static array is like a trendy new smartphone that quickly becomes outdated."))
print()
print(predict_relevance(question, desired_answer, "static array, non-static array, static array non-static array static array non-static array static array non-static array static array non-static array static array non-static array "))
print()
print(predict_relevance(question, desired_answer, "An array is something which starts with square brackets and all values inside it are seaperated by commas thats why it is static"))
print()
print(predict_relevance(question, desired_answer, "The static array is something that is not non static. Thats why it is called as static array."))
print()


print("=================== relevant answers ==========================")
print(predict_relevance(question, desired_answer, student_answer))
print()
print(predict_relevance(question, desired_answer, newAnswer))
print()
print(predict_relevance(question, desired_answer, "A static array persists throughout the lifetime of a program, while a non-static array is deallocated once it goes out of scope."))
print()
print(predict_relevance(question, desired_answer, "A static array is initialized to zero by default, while a non-static array has an undefined initial value unless explicitly initialized."))
print()
print(predict_relevance(question, desired_answer, "A static array can be accessed from any function within a file, while a non-static array is only accessible within the function in which it is declared."))
print()
print(predict_relevance(question, desired_answer, "A static array can be initialized only once, at compile time, while a non-static array can be initialized multiple times, including at runtime."))
print()
print(predict_relevance(question, desired_answer, "A static array can have a fixed size that is determined at compile time, while a non-static array can have a variable size that is determined at runtime."))
print()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
R-squared: 0.22154515177082867
Mean absolute error: 0.7521212661659324
Mean squared error: 0.9042577720454509
Root mean squared error: 0.9509246931515928
2.1331456
Cosine  0.15621793
1.1331455707550049

2.6106246
Cosine  0.006070841
1.6106245517730713

2.759438
Cosine  0.2627005
1.7594380378723145

3.5820034
Cosine  0.31882495
2.582003355026245

2.6903641
Cosine  0.30789843
1.690364122390747

3.4710207
Cosine  0.5591867
4.471020698547363

3.0040102
Cosine  0.45380533
4.004010200500488

3.2498345
Cosine  0.55349475
4.2498345375061035

3.8645263
Cosine  0.47716698
4.864526271820068

3.968431
Cosine  0.59303045
4.968430995941162

3.8104968
Cosine  0.70279765
4.810496807098389

3.9276307
Cosine  0.6458799
4.927630662918091

3.9344177
Cosine  0.58219373
4.934417724609375

3.5272882
Cosine  0.6923961
4.527288198471069

3.9466922
Cosine  0.5261805
4.946692228317261

