### Part 3
Advanced Model. Create the best fake news predictor.
You might consider using a Support Vector Machine, a Naive Bayes Classifier, or a neural network. 
The input features might use a more complex test representations, such as TF-IDF weights or continuous word embeddings.
- Report necessary details about your models ensuring full reproducibility.
    - For example, choice of relevant parameters and how you chose them. 
    - Make sure to argue for why you chose this approach over potential alternatives.

In [None]:
import pandas as pd
from scipy import sparse
from scipy.sparse import hstack
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer 

In [None]:
content = sparse.load_npz('csv/sparse_matrix_for_text.npz')
title = sparse.load_npz('csv/sparse_matrix_for_title.npz') 

collected_matrix = hstack([content,title]) 
sparse.save_npz("csv/sparse_matrix_for_all.npz", collected_matrix)

labels = pd.read_csv('csv/labels.csv')
features = sparse.load_npz('csv/sparse_matrix_for_all.npz')



In [None]:
X = features
y = labels 
y = np.ravel(y)
y

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report 
from imblearn.over_sampling import RandomOverSampler 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.5, random_state=0)

oversampler = RandomOverSampler(random_state=0)
X_train_oversampled, y_train_oversampled = oversampler.fit_resample(X_train, y_train)

model = ComplementNB() 
model.fit(X_train_oversampled, y_train_oversampled)

print(model.score(X_train_oversampled, y_train_oversampled))
print(model.score(X_val, y_val))

predictions_NB = model.predict(X_val)

print("Naive Bayes Accuracy Score -> ",accuracy_score(predictions_NB, y_val)*100)

y_val_pred = model.predict(X_val) 

report = classification_report(y_val, y_val_pred)
print("classification report:")
print(report)

In [None]:
from sklearn.naive_bayes import ComplementNB

# Define the parameter grid for the grid search
param_grid = [0.00001,0.0001,0.001, 0.01,0.1,1,10,100,1000,10000]

# Initialize the best_C and best_score variables
best_alpha = None
best_score = 0

for pg in param_grid:
    model = ComplementNB(alpha=pg)
    
    model.fit(X_train_oversampled, y_train_oversampled)
    
    y_val_pred = model.predict(X_val)
    
    # Calculate the accuracy score for the current C value
    score = accuracy_score(y_val, y_val_pred)
    
    # Print the current C value and its accuracy score
    print(f"alpha: {pg}, Accuracy: {score}")
    
    # Update the best_C and best_score variables if the current score is higher than the previous best
    if score > best_score:
        best_alpha = pg
        best_score = score
    
# Print the best C value and its accuracy score
print(f"\nBest alpha: {best_alpha}, Best Accuracy: {best_score}")
    