**Installation of required libraries**

In [1]:
!pip install pyaudio



In [2]:
!pip install SpeechRecognition



In [3]:
!pip install pywhatkit



In [4]:
!pip install nltk



**Recording the audio and converting it into text**

In [5]:
#Importing the libraries for conversion of audio to text
import speech_recognition as sr
import IPython

In [6]:
# Initialize recognizer
r = sr.Recognizer()

# Capture audio from microphone
with sr.Microphone() as source:
    print("Speak something to record the audio...")
    audio = r.listen(source)

# Convert audio to text
try:
    text = r.recognize_google(audio)
    print("You said: " + text)
except sr.UnknownValueError:
    print("Sorry, I could not understand what you said")
except sr.RequestError:
    print("Sorry, my speech service is currently down")

Could not import the PyAudio C module 'pyaudio._portaudio'.


AttributeError: Could not find PyAudio; check installation

In [None]:
#assigning the converted text to a variable
l=text

**Cleaning the dataset**

In [None]:
#importing required libraries for cleaning the dataset(removing nan values,stopwords)
import pandas as pd
import re
import nltk
from tqdm import tqdm
tqdm.pandas()

In [None]:
#downloading the stopwords package
nltk.download('stopwords')

In [None]:
#loading the dataset
data = pd.read_csv("Final_dataset.csv",encoding="ISO-8859-1")

In [None]:
STOP_WORDS = nltk.corpus.stopwords.words() + ["br"]

#creating a function for cleanig the sentences in the dataset
def clean_sentence(val):
    "remove chars that are not letters or numbers, downcase, then remove stop words"
    regex = re.compile('([^\s\w]|_)+')
    sentence = regex.sub('', val).lower()
    sentence = sentence.split(" ")

    for word in list(sentence):
        if word in STOP_WORDS:
            sentence.remove(word)

    sentence = " ".join(sentence)
    return sentence

#creating a function for drop nans, then apply 'clean_sentence' function to Recordings
def clean_dataframe(data):
    data = data.dropna(how="any")

    for col in ['Recording']:
        data[col] = data[col].progress_apply(clean_sentence)

    return data

data = clean_dataframe(data)

In [None]:
#creating new csv file for the cleaned data
data.to_csv('dataset_clean.csv')

**Loading and encoding the data from the dataset**

In [None]:
#importing the chardet library for encoding the data from the dataset
import chardet
# Detect the encoding of the cleaned data file
with open('dataset_clean.csv', 'rb') as f:
    result = chardet.detect(f.read())
encoding = result['encoding']

**Training the data with different ML models**

In [None]:
#importing the required libraries to ready the data for modeling
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

In [None]:
# Load the data
df = pd.read_csv('dataset_clean.csv',encoding=encoding)
df=df.dropna(how="any")
# Split the data into training and testing sets
X = df['Recording']
y = df['Threat']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state = 0)
# Convert text to bag-of-words features
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

**Logistic regression**

In [None]:
from sklearn.linear_model import LogisticRegression
# Training a logistic regression model
clf_lr = LogisticRegression()
clf_lr.fit(X_train, y_train)

# Making predictions on the test set
y_pred_lr = clf_lr.predict(X_test)

**Decision tree classifier**

In [None]:
from sklearn.tree import DecisionTreeClassifier
# Training a decision tree classifier
clf_dt = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)
clf_dt.fit(X_train, y_train)

# Making predictions on the test set
y_pred_dt = clf_dt.predict(X_test)

**Random forest classifier**

In [None]:
from sklearn.ensemble import RandomForestClassifier
# Train a random forest classifier
clf_rf = RandomForestClassifier(n_estimators = 100,random_state=42)
clf_rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = clf_rf.predict(X_test)

**Naive Bayes classifier**

In [None]:
from sklearn.naive_bayes import MultinomialNB
# Train a Naive Bayes classifier
clf_nb = MultinomialNB()
clf_nb.fit(X_train, y_train)

# Make predictions on the test set
y_pred_nb = clf_nb.predict(X_test)

**Making the confusion matrix**

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

#Making the confusion matrix for logistic regression model
cm_lr= confusion_matrix(y_test, y_pred_lr)
# Create a DataFrame to display the confusion matrix
cm_df = pd.DataFrame(cm_lr, index=['Not Approved', 'Approved'], columns=['Not Approved', 'Approved'])

# Visualize the confusion matrix using a heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm_df, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix of logistic regression model')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

#Making the confusion matrix for decision tree classifier
cm_dt= confusion_matrix(y_test, y_pred_dt)
# Create a DataFrame to display the confusion matrix
cm_df = pd.DataFrame(cm_dt, index=['Not Approved', 'Approved'], columns=['Not Approved', 'Approved'])

# Visualize the confusion matrix using a heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm_df, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix of decision tree classifier')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

#Making the confusion matrix for random forest classifier
cm_rf= confusion_matrix(y_test, y_pred_rf)
# Create a DataFrame to display the confusion matrix
cm_df = pd.DataFrame(cm_rf, index=['Not Approved', 'Approved'], columns=['Not Approved', 'Approved'])

# Visualize the confusion matrix using a heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm_df, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix of random forest classifier')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

#Making the confusion matrix for Naive Bayes classifier
cm_nb= confusion_matrix(y_test, y_pred_nb)
# Create a DataFrame to display the confusion matrix
cm_df = pd.DataFrame(cm_nb, index=['Not Approved', 'Approved'], columns=['Not Approved', 'Approved'])
print()
# Visualize the confusion matrix using a heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm_df, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix of Naive Bayes classifier')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


**Calculating the accuracy of the trained models**

In [None]:
from sklearn.metrics import accuracy_score

#Calculate accuracy score for logistic regression model
acc_lr= accuracy_score(y_test, y_pred_lr)
print('Accuracy of logistic regression model :', acc_lr*100)

#Calculate accuracy score for decision tree classifier
acc_dt= accuracy_score(y_test, y_pred_dt)
print('Accuracy of decision tree classifier :', acc_dt*100)

#Calculate accuracy score for random forest classifier
acc_rf= accuracy_score(y_test, y_pred_rf)
print('Accuracy of random forest classifier :', acc_rf*100)

#Calculate accuracy score for Naive Bayes classifier
acc_nb= accuracy_score(y_test, y_pred_nb)
print('Accuracy of Naive Bayes classifier :', acc_nb*100)

**Testing the text which was generated by converting the recorded audio**

In [None]:
raw_text =l
raw_text_bow = vectorizer.transform([raw_text])
# Using  the model to predict the output of the raw text
output = clf_lr.predict(raw_text_bow)[0]
print(output)

**Sending the alert messages through whatsapp using pywhatkit library**

In [None]:
#importing the required libraries to send the messages
import pywhatkit
import datetime
#storing the numbers to which you want to send the alert
registernumbers=["---numbers to which you want to send the alert---"]

if output==0:
    for j in range(0,len(registernumbers)):
        number=registernumbers[j]
        message = "Threatening call alert"
        pywhatkit.sendwhatmsg(number, message, datetime.datetime.now().hour, datetime.datetime.now().minute + 1)
else:
    print("it's not a threaten call")