In [14]:
import pandas as pd

# Load the data
df = pd.read_csv('spam.csv', encoding='latin1')

# Print the first few rows to inspect the structure
print(df.head())

# Print the columns to understand the structure
print(df.columns)


     v1                                                 v2 Unnamed: 2  \
0   ham  Go until jurong point, crazy.. Available only ...        NaN   
1   ham                      Ok lar... Joking wif u oni...        NaN   
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...        NaN   
3   ham  U dun say so early hor... U c already then say...        NaN   
4   ham  Nah I don't think he goes to usf, he lives aro...        NaN   

  Unnamed: 3 Unnamed: 4  
0        NaN        NaN  
1        NaN        NaN  
2        NaN        NaN  
3        NaN        NaN  
4        NaN        NaN  
Index(['v1', 'v2', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')


In [15]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import pickle

# Step 1: Load Data
df = pd.read_csv('spam.csv', encoding='latin1')

# Step 2: Select Relevant Columns and Rename Them
df = df[['v1', 'v2']]  # Select only the relevant columns
df.columns = ['label', 'message']  # Rename columns for clarity

# Step 3: Map Labels to Binary Values
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Step 4: Prepare the Data
X = df['message']
y = df['label']

# Step 5: Vectorize the Text Data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Step 6: Train the Model
model = MultinomialNB()
model.fit(X_vectorized, y)

# Step 7: Save the Model and Vectorizer
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

print("Model and vectorizer saved successfully.")


Model and vectorizer saved successfully.


In [16]:
import pickle
from sklearn.feature_extraction.text import CountVectorizer

# Step 8: Load the Model and Vectorizer
with open('model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

with open('vectorizer.pkl', 'rb') as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)

# Step 9: Make Predictions
sample_text = ["Free entry in 2 a wkly comp to win FA Cup fina..."]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")


Prediction: Spam


In [17]:
sample_text = ["Dear Vidhi Sheth, Hope you're doing well! Exciting career move! Deloitte is recruiting Process Analyst with competitive salary options"]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")


Prediction: Not Spam


In [18]:
sample_text = ["Welcome to TechVarsity: The next big thing in tech education"]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")

Prediction: Not Spam


In [19]:
sample_text = ["Did You see the match? It was insane."]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")

Prediction: Not Spam


In [20]:
sample_text = ["I love you. Do you love me ?"]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")

Prediction: Not Spam


In [21]:
sample_text = ["Accident Compensation. Yo have still not claim the compensation you are due for the accident you had. To start thed process please reply yes. To opt out reply stop."]
vector_input = vectorizer.transform(sample_text)
result = model.predict(vector_input)[0]

print("Prediction:", "Spam" if result else "Not Spam")

Prediction: Spam
