# Importation

In [1]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
import seaborn as sns
import matplotlib.pyplot as plt
import spacy


In [2]:
df = pd.read_csv('../data/corpus.csv')

# Séparation test et entrainement

In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_df.to_csv('../data/train.csv', index=False)
test_df.to_csv('../data/test.csv', index=False)


### Ouvrir les csv qui ont déjà été divisés

In [4]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')

# Baseline 

### Prédiction aléatoire

In [5]:
def random_prediction():
    return random.choice(['Plat principal', 'Entrée', 'Dessert'])

test_df['random prediction'] = test_df.apply(lambda x: random_prediction(), axis=1)


### Prédiction classe majoritaire

In [6]:
def main_class_prediction(train_df):
    main_type = train_df['type'].mode()    
    return main_type

main_type = main_class_prediction(train_df)
test_df['main class prediction'] = test_df.apply(lambda x: main_type, axis=1)

# Model

### SKlearn + Tfi-Df

In [7]:
x_train = train_df['recette']
y_train = train_df['type']

model1 = make_pipeline(TfidfVectorizer(), MultinomialNB())

model1.fit(x_train, y_train)

y_pred_Sklearn = model1.predict(test_df['recette'])

test_df['Tfi-Df prediction'] = y_pred_Sklearn

y_test = test_df['type']

### Spacy

In [9]:
nlp = spacy.load("fr_core_news_sm")

def spacy_tokenizer(text):
    tokens = nlp(text)
    return [token.lemma_ for token in tokens if not token.is_punct and not token.is_stop]

model2 = make_pipeline(TfidfVectorizer(tokenizer=spacy_tokenizer), SVC())

model2.fit(x_train, y_train)

y_pred_spacy = model2.predict(test_df['recette'])




# Résultat

### Random Prediction

In [None]:
y_rand = test_df['random prediction']

conf_matrix = confusion_matrix(y_test, y_rand)

conf_df = pd.DataFrame(conf_matrix, index=model.classes_, columns=model.classes_)

print("Matrice de Confusion : \n", conf_df)

plt.figure(figsize=(8,6))
sns.heatmap(conf_df, annot=True, cmap="YlGnBu", fmt='g')
plt.title('Matrice de Confusion')
plt.xlabel('Prédictions')
plt.ylabel('Valeurs Réelles')
plt.show()

# Rappel - Precision - F1-score

report = classification_report(y_test, y_rand)
print(report)

### Main Class Prediction

In [None]:
y_main = test_df['main class prediction']

conf_matrix = confusion_matrix(y_test, y_main)

conf_df = pd.DataFrame(conf_matrix, index=model.classes_, columns=model.classes_)

print("Matrice de Confusion : \n", conf_df)

plt.figure(figsize=(8,6))
sns.heatmap(conf_df, annot=True, cmap="YlGnBu", fmt='g')
plt.title('Matrice de Confusion')
plt.xlabel('Prédictions')
plt.ylabel('Valeurs Réelles')
plt.show()

# Rappel - Precision - F1-score

report = classification_report(y_test, y_main)
print(report)

### Sklearn + Tfi-DF

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred_Sklearn)

conf_df = pd.DataFrame(conf_matrix, index=model.classes_, columns=model.classes_)

print("Matrice de Confusion : \n", conf_df)

plt.figure(figsize=(8,6))
sns.heatmap(conf_df, annot=True, cmap="YlGnBu", fmt='g')
plt.title('Matrice de Confusion')
plt.xlabel('Prédictions')
plt.ylabel('Valeurs Réelles')
plt.show()

# Rappel - Precision - F1-score

report = classification_report(y_test, y_pred_Sklearn)
print(report)

### Spacy

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred_spacy)

conf_df = pd.DataFrame(conf_matrix, index=model.classes_, columns=model.classes_)

print("Matrice de Confusion : \n", conf_df)

plt.figure(figsize=(8,6))
sns.heatmap(conf_df, annot=True, cmap="YlGnBu", fmt='g')
plt.title('Matrice de Confusion')
plt.xlabel('Prédictions')
plt.ylabel('Valeurs Réelles')
plt.show()

# Rappel - Precision - F1-score

report = classification_report(y_test, y_pred_spacy)
print(report)