# Comparing Different Models

- Obtaining and loading data
- Exploring the data
- Machine learning
    - Split data
    - Two pipelines with different models
    - Model evaluation: compare performances
- Apply each model (do one prediction)

### Obtaining Data

In [None]:
import pandas as pd

df = pd.read_csv("data/mental_health.csv")
df.head()

### Exploration

In [None]:
df.info()
df.label.value_counts()

### Modelling

In [None]:
from sklearn.model_selection import train_test_split

X = df["text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=17)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC

pipeBayes = Pipeline([("tfidf", TfidfVectorizer()), ("svm", MultinomialNB())])
pipeBayes.fit(X_train, y_train)

pipeSVM = Pipeline([("tfidf", TfidfVectorizer()), ("bayes", LinearSVC())])
pipeSVM.fit(X_train, y_train)

### Evaluation

In [None]:
from sklearn import metrics

predictionsBayes = pipeBayes.predict(X_test)
accuracyBayes = metrics.accuracy_score(y_true=y_test, y_pred=predictionsBayes)
confusionBayes = metrics.confusion_matrix(y_true=y_test, y_pred=predictionsBayes)
print(accuracyBayes)
print(confusionBayes)

predictionsSV = pipeSVM.predict(X_test)
accuracySV = metrics.accuracy_score(y_true=y_test, y_pred=predictionsSV)
confusionSV = metrics.confusion_matrix(y_true=y_test, y_pred=predictionsSV)
print(accuracySV)
print(confusionSV)

### Application

In [None]:
aBayes = pipeBayes.predict(['''
    nothing look forward lifei dont many 
    reasons keep going feel like nothing 
    keeps going next day makes want hang myself
    '''])
print(aBayes)

aSVM = pipeSVM.predict(['''
    nothing look forward lifei dont many 
    reasons keep going feel like nothing 
    keeps going next day makes want hang myself
    '''])
print(aSVM)