# Neural Network with Keras

- Obtaining and loading data
- Exploring the data
- Preprocessing: text to tf/idf vectors
- Machine learning
    - Split data
    - Pipeline: vectorisation and model fiting
    - Model evaluation
- Apply model (do one prediction)

### Obtaining Data

In [None]:
import pandas as pd

df = pd.read_csv("data/mental_health.csv")
df.head()

### Exploration

In [None]:
df.info()
df.label.value_counts()

### Data Set Preparation

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

X = df["text"]
y = df["label"]

vectoriser = TfidfVectorizer()
X_vectors = vectoriser.fit_transform(X.to_numpy())

# print(vectoriser.vocabulary_)
# print(vectoriser.get_feature_names_out())
print(X_vectors.shape)
print("nof features: ", len(vectoriser.vocabulary_.items()))

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y.to_numpy())
y_encoded_categorical = to_categorical(y_encoded)


['__' '___' '____' ... 'zzuckerberg' 'zzzz'
 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz']
(27977, 72622)
nof features:  72622


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_vectors, y_encoded_categorical, test_size=0.2, random_state=17)

print(X_train)

### Modelling

In [None]:
from keras import Sequential
from keras.layers import Input, Dense, Dropout

model = Sequential()
model.add(Input(shape=(72622,)))
model.add(Dense(64, activation="relu"))
model.add(Dense(2, activation="softmax"))

In [None]:
from keras.optimizers import Adam

optimizer = Adam(clipvalue=0.5)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["acc"])

In [None]:
history = model.fit(X_train, y_train,
                    batch_size=32,
                    epochs=3,
                    validation_data = (X_test, y_test),
                    verbose=2
)

### Evaluation

In [None]:
from sklearn import metrics

predictions = pipe.predict(X_test)
accuracy = metrics.accuracy_score(y_true=y_test, y_pred=predictions)
confusion = metrics.confusion_matrix(y_true=y_test, y_pred=predictions)
print(accuracy)
print(confusion)

### Application

In [None]:
a = model.predict(['''
    nothing look forward lifei dont many 
    reasons keep going feel like nothing 
    keeps going next day makes want hang myself
    '''])
print(a)