<a href="https://colab.research.google.com/github/yahias21/Extensive_analysis_on_NLP_models_for_sentiment_analysis_in_business_applications/blob/master/Ensemble_modeling_on_sentiment_analysis_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn import metrics
import io
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.model_selection import train_test_split
from statistics import mode
from google.colab import files
uploaded = files.upload()

In [None]:
df1=pd.read_csv(io.BytesIO(uploaded['output.csv']))
truexg = df1["True"].apply(lambda x: 0 if x==-1 else x)
true=df1['True']
Rob=df['RobSent'].mul(df1['RobConf'])
FF=df['flairFSent'].mul(df1['flairFConf'])
FA=df['flairASent'].mul(df1['flairAConf'])
dfL= pd.concat([Rob,FF,FA],axis=1,names=['RobSent','flairFSent','flairASent'])
df=df1[['flairFSent','flairASent','RobSent']]


# Base models:

In [None]:
print(metrics.classification_report(true,df['RobSent']))
print(metrics.classification_report(true,df['flairFSent']))
print(metrics.classification_report(true,df['flairASent']))


              precision    recall  f1-score   support

          -1       0.97      0.96      0.96     49986
           1       0.96      0.97      0.96     50014

    accuracy                           0.96    100000
   macro avg       0.96      0.96      0.96    100000
weighted avg       0.96      0.96      0.96    100000

              precision    recall  f1-score   support

          -1       0.91      0.96      0.93     49986
           1       0.95      0.90      0.93     50014

    accuracy                           0.93    100000
   macro avg       0.93      0.93      0.93    100000
weighted avg       0.93      0.93      0.93    100000

              precision    recall  f1-score   support

          -1       0.91      0.97      0.94     49986
           1       0.97      0.91      0.94     50014

    accuracy                           0.94    100000
   macro avg       0.94      0.94      0.94    100000
weighted avg       0.94      0.94      0.94    100000



# Stacking:

![Picture](https://www.researchgate.net/profile/Bayu-Adhi-Tama/publication/318260780/figure/fig2/AS:600512453943296@1520184734369/Classifier-ensemble-using-stacking.png)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, true, test_size=0.2, random_state=50,stratify=true)
lrModel = LogisticRegression()
xgModel = xgb.XGBRegressor(objective ='reg:logistic', colsample_bytree = 0.3, learning_rate = 0.1,
                max_depth = 8, alpha = 10, n_estimators = 10)
lrModel.fit(X_train, y_train)
xgModel.fit(X_train, y_train)

In [None]:
print(lrModel.score(X_test,y_test))
out=lrModel.predict(X_test)
print(metrics.classification_report(y_test,out))
print(xgModel.score(X_test,y_test))

0.96125
              precision    recall  f1-score   support

           0       0.97      0.95      0.96      9997
           1       0.95      0.97      0.96     10003

    accuracy                           0.96     20000
   macro avg       0.96      0.96      0.96     20000
weighted avg       0.96      0.96      0.96     20000

0.7212546926365101


# Cascading:

![picture](https://miro.medium.com/max/2000/1*5RMlhv9U_gPNTnamEwY-uQ.jpeg)

In [None]:
sentiment = []
for i in range(len(true)):
    if (df1['flairFConf'].at[i]>0.9):
      sentiment.append(df1['flairFSent'].at[i])
    else:
      if (df1['flairAConf'].at[i]>0.9):
        sentiment.append(df1['flairASent'].at[i])
      else:
        sentiment.append(df1['RobSent'].at[i])
print(metrics.classification_report(true,sentiment))


              precision    recall  f1-score   support

          -1       0.92      0.97      0.94     49986
           1       0.97      0.92      0.94     50014

    accuracy                           0.94    100000
   macro avg       0.94      0.94      0.94    100000
weighted avg       0.94      0.94      0.94    100000



# Voting:

![picture](https://i.stack.imgur.com/W7UmY.png)

In [None]:
vote = []
for i in range(len(true)):
    vote.append(mode([df['flairFSent'].at[i],df['flairASent'].at[i],df['RobSent'].at[i]]))
print(metrics.classification_report(true,vote))

              precision    recall  f1-score   support

          -1       0.94      0.97      0.95     49986
           1       0.97      0.93      0.95     50014

    accuracy                           0.95    100000
   macro avg       0.95      0.95      0.95    100000
weighted avg       0.95      0.95      0.95    100000



# Conclusions:
- From results we conclude that native models are more effective than stacking
- Stacking is the best choice when:
  - The model is trained on different datasets (i.e.) a multiple high bias low variance simple datasets is the best for such a technique 
- Cascading is best technique for a tradeoff between performance and accuracy
- Hardvoting(mode) is least effiecent technique
- Logistic regression is chosen following the Ockham’s Razor rule: no assumption was given to the dataset so it was the best choice for the metamodel
- a flair and vader stacking was observed to give great results on 140sentiment dataset
# R&D:
- Roberta-large english is the finest regarding to accuracy
- Due to its superb performance flair (RNN) model is best tradeoff between accuracy and performance
    - Training with flair multiple datasets and use stacking technique will lead to a much better results ( as flair is 50x faster than roberta large)
- Aspect extraction and Aspect classification( using LDA and TF-IDF with BOW methods) will enrich results and give a great insights for the business user

