In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelBinarizer
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report, accuracy_score


In [3]:
data=pd.read_csv("labeled_emails_active_learning.csv")

In [4]:
print(tf.__version__)

2.17.0


In [5]:
data.head

<bound method NDFrame.head of                                                   from  \
0    NDLI Account Activation <ndl-support@iitkgp.ac...   
1       Radhika Patel <radhikapatel.it@charusat.ac.in>   
2       Radhika Patel <radhikapatel.it@charusat.ac.in>   
3    "Gaurang Patel (Classroom)" <no-reply@classroo...   
4             LinkedIn <messages-noreply@linkedin.com>   
..                                                 ...   
480  Bhargav Shobhana <bhargavshobhana.cv@charusat....   
481     Radhika Patel <radhikapatel.it@charusat.ac.in>   
482         "Coursera" <Coursera@m.learn.coursera.org>   
483                      Devpost <support@devpost.com>   
484    Sports CHARUSAT <sports.officer@charusat.ac.in>   

                                                    to  \
0                     Vansh <23dcs056@charusat.edu.in>   
1    21dcse@charusat.edu.in, 22dcse@charusat.edu.in...   
2    23dcse@charusat.edu.in, 23dce@charusat.edu.in,...   
3                             23dcs056@ch

In [6]:
X = data['content']  # Email content
y = data['label']    # Priority/Optional labels

In [8]:
X

0      NDLI account verification  *Dear Vansh ,*  Tha...
1      Dear Students,  Greetings!!!  Kindly note that...
2      ---------- Forwarded message --------- From: S...
3      Notification settings CSE202: Microprocessor a...
4      View Kshitish’s profile: https://www.linkedin....
                             ...                        
480    Dear Students,  An online quiz is planned for ...
481    Regards, Radhika H. Patel, Assistant Professor...
482    Plus, get a special offer from Google        /...
483    ****************************************** Tak...
484    Dear All,  Greeting of CHARUSAT Sports   Repor...
Name: content, Length: 485, dtype: object

In [9]:
# Convert labels to binary format for classification (if multiclass, use one-hot encoding)
lb = LabelBinarizer()
y = lb.fit_transform(y)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text to TF-IDF features
tfidf = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf.fit_transform(X_train).toarray()
X_test_tfidf = tfidf.transform(X_test).toarray()

In [10]:

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, input_shape=(X_train_tfidf.shape[1],), activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')  # For binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_tfidf, y_train, epochs=10, batch_size=32, validation_data=(X_test_tfidf, y_test))

# Make predictions on the test set
y_pred_prob = model.predict(X_test_tfidf)
y_pred = (y_pred_prob > 0.5).astype(int)

Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6328 - loss: 0.6680 - val_accuracy: 0.6907 - val_loss: 0.5870
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7343 - loss: 0.5242 - val_accuracy: 0.7113 - val_loss: 0.4810
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7719 - loss: 0.4269 - val_accuracy: 0.7423 - val_loss: 0.3818
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8625 - loss: 0.3348 - val_accuracy: 0.9381 - val_loss: 0.2748
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9486 - loss: 0.2708 - val_accuracy: 0.9485 - val_loss: 0.1935
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9774 - loss: 0.1622 - val_accuracy: 0.9588 - val_loss: 0.1265
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━

In [12]:

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9896907216494846
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        36
           1       1.00      0.98      0.99        61

    accuracy                           0.99        97
   macro avg       0.99      0.99      0.99        97
weighted avg       0.99      0.99      0.99        97



In [13]:
# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the converted model to a file
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

print("TensorFlow Lite model exported successfully!")


INFO:tensorflow:Assets written to: C:\Users\vansh\AppData\Local\Temp\tmpwdrmx12d\assets


INFO:tensorflow:Assets written to: C:\Users\vansh\AppData\Local\Temp\tmpwdrmx12d\assets


Saved artifact at 'C:\Users\vansh\AppData\Local\Temp\tmpwdrmx12d'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 1000), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2343185835088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2343185834704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2343185835664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2343185835280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2343185836432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2343185837584: TensorSpec(shape=(), dtype=tf.resource, name=None)
TensorFlow Lite model exported successfully!


In [1]:
import joblib

# Save the TF-IDF vectorizer to a file
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')


NameError: name 'tfidf' is not defined