# APP RATING DATA SET

## AIM
To predict rating for a mobile app given features like size, number of downloads, etc.

# Importing Libraries

In [240]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [241]:
app_ori = pd.read_csv('Apps_data.csv') 

In [242]:
app_ori.sample(10)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
7053,Dragon BZ Wallpapers HD,PERSONALIZATION,,5,17M,"1,000+",Free,0,Everyone,Personalization,"April 4, 2018",1.11,4.0 and up
9886,taxi.eu,TRAVEL_AND_LOCAL,4.1,1865,22M,"100,000+",Free,0,Everyone,Travel & Local,"June 28, 2018",0.437592593,4.4 and up
7740,Gunship Modern Combat 3D,GAME,4.2,3247,26M,"500,000+",Free,0,Teen,Action,"July 25, 2017",1.05,2.3 and up
7551,Inch/cm/Foot Conversion,PRODUCTIVITY,4.0,319,2.1M,"100,000+",Free,0,Everyone,Productivity,"August 20, 2017",2.00.01,2.3.3 and up
6036,Rabbithole,FAMILY,3.5,3347,15M,"100,000+",Free,0,Everyone,Entertainment,"July 18, 2018",1.1.15.12,4.4 and up
7845,GATE CS Engineering 2019 Exam Prep App,FAMILY,4.2,162,6.2M,"10,000+",Free,0,Everyone,Education,"February 15, 2018",Y4W-GATE_CS-5.0.0,4.1 and up
8686,DP & Status for Whatsapp 2018,PERSONALIZATION,4.1,293,3.4M,"100,000+",Free,0,Everyone,Personalization,"August 3, 2018",1.4,4.0.3 and up
7926,Tech CU Card Manager,FINANCE,1.0,2,7.2M,"1,000+",Free,0,Everyone,Finance,"July 25, 2017",1.00.01,4.0 and up
5697,Total AV,PRODUCTIVITY,3.4,617,46M,"100,000+",Free,0,Everyone,Productivity,"April 17, 2018",1.05.03,4.1 and up
6344,3D DJ – DJ Mixer 2018,FAMILY,4.3,6333,30M,"1,000,000+",Free,0,Everyone,Entertainment,"May 25, 2018",1.01.12,5.0 and up


In [243]:
app_ori.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


* The Rating column has the highest number of null values and it is the only column of numeric Data Type.
* 'Price', 'Content Rating', 'Current Ver' and 'Android Ver' have null values.
* 'Size', 'Installs', 'Price', 'Last Updated', 'Current Ver' and 'Android Ver' attributes are to be cleaned to change them from Object data type

In [244]:
app_ori.nunique()

App               9660
Category            34
Rating              40
Reviews           6002
Size               462
Installs            22
Type                 3
Price               93
Content Rating       6
Genres             120
Last Updated      1378
Current Ver       2750
Android Ver         33
dtype: int64

In [245]:
app_ori.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
App,10841,9660.0,ROBLOX,9.0,,,,,,,
Category,10841,34.0,FAMILY,1972.0,,,,,,,
Rating,9367,,,,4.19334,0.537431,1.0,4.0,4.3,4.5,19.0
Reviews,10841,6002.0,0,596.0,,,,,,,
Size,10841,462.0,Varies with device,1695.0,,,,,,,
Installs,10841,22.0,"1,000,000+",1579.0,,,,,,,
Type,10840,3.0,Free,10039.0,,,,,,,
Price,10841,93.0,0,10040.0,,,,,,,
Content Rating,10840,6.0,Everyone,8714.0,,,,,,,
Genres,10841,120.0,Tools,842.0,,,,,,,


# DATA PREPROCESSING

In [246]:
app_clean = app_ori.copy()
app_clean = app_clean.drop_duplicates(['App'])

In [247]:
app_clean['Reviews'].unique()

array(['159', '967', '87510', ..., '603', '1195', '398307'], dtype=object)

In [248]:
app_clean['Reviews'].replace('3.0M', 3000000, inplace=True)

In [249]:
app_clean['Type'].unique()

array(['Free', 'Paid', nan, '0'], dtype=object)

In [250]:
app_clean['Type'] = app_clean['Type'].replace('NaN', np.nan)
app_clean['Type'] = app_clean['Type'].replace('0', 'Free')

In [251]:
app_clean['Price'].unique()

array(['0', '$4.99', '$3.99', '$6.99', '$1.49', '$2.99', '$7.99', '$5.99',
       '$3.49', '$1.99', '$9.99', '$7.49', '$0.99', '$9.00', '$5.49',
       '$10.00', '$24.99', '$11.99', '$79.99', '$16.99', '$14.99',
       '$1.00', '$29.99', '$12.99', '$2.49', '$10.99', '$1.50', '$19.99',
       '$15.99', '$33.99', '$74.99', '$39.99', '$3.95', '$4.49', '$1.70',
       '$8.99', '$2.00', '$3.88', '$25.99', '$399.99', '$17.99',
       '$400.00', '$3.02', '$1.76', '$4.84', '$4.77', '$1.61', '$2.50',
       '$1.59', '$6.49', '$1.29', '$5.00', '$13.99', '$299.99', '$379.99',
       '$37.99', '$18.99', '$389.99', '$19.90', '$8.49', '$1.75',
       '$14.00', '$4.85', '$46.99', '$109.99', '$154.99', '$3.08',
       '$2.59', '$4.80', '$1.96', '$19.40', '$3.90', '$4.59', '$15.46',
       '$3.04', '$4.29', '$2.60', '$3.28', '$4.60', '$28.99', '$2.95',
       '$2.90', '$1.97', '$200.00', '$89.99', '$2.56', '$30.99', '$3.61',
       '$394.99', '$1.26', 'Everyone', '$1.20', '$1.04'], dtype=object)

In [252]:
app_clean['Price'] = app_clean['Price'].str.replace('$', '')

In [253]:
app_clean['Category'].value_counts()

FAMILY                 1832
GAME                    959
TOOLS                   827
BUSINESS                420
MEDICAL                 395
PERSONALIZATION         376
PRODUCTIVITY            374
LIFESTYLE               369
FINANCE                 345
SPORTS                  325
COMMUNICATION           315
HEALTH_AND_FITNESS      288
PHOTOGRAPHY             281
NEWS_AND_MAGAZINES      254
SOCIAL                  239
BOOKS_AND_REFERENCE     222
TRAVEL_AND_LOCAL        219
SHOPPING                202
DATING                  171
VIDEO_PLAYERS           163
MAPS_AND_NAVIGATION     131
EDUCATION               119
FOOD_AND_DRINK          112
ENTERTAINMENT           102
AUTO_AND_VEHICLES        85
LIBRARIES_AND_DEMO       84
WEATHER                  79
HOUSE_AND_HOME           74
ART_AND_DESIGN           64
EVENTS                   64
PARENTING                60
COMICS                   56
BEAUTY                   53
1.9                       1
Name: Category, dtype: int64

In [254]:
app_clean.Category.replace('1.9' , 'FAMILY', inplace = True)

In [255]:
app_clean['Size'].sample(10)

10745                  5.8M
3163     Varies with device
10481                   43M
1688                    78M
573                     28M
5602     Varies with device
2855                   4.2M
3372     Varies with device
6415                    40M
1394                    18M
Name: Size, dtype: object

In [256]:
app_1 = app_clean.copy()
app_clean['Size'] = app_clean['Size'].str.replace('M','')
app_clean['Size'] = app_clean['Size'].str.replace('k','')
app_1['Size'].replace(['Varies with device','FAILY'], np.nan, inplace =True)
app_clean['Size'].replace(['Varies with device','FAILY'], np.nan, inplace =True)
app_clean['Size'] = pd.to_numeric(app_clean['Size'], errors='coerce')


In [293]:
# Converting all the  M (mb) sizes to k (kb).
indx = [app_1['Size'].str.find('M')>0]
for i in indx:
    app_clean.loc[i, 'Size'] = app_clean.loc[i, 'Size']*1024

In [258]:
app_clean['Installs'].unique()

array(['10,000+', '500,000+', '5,000,000+', '50,000,000+', '100,000+',
       '50,000+', '1,000,000+', '10,000,000+', '5,000+', '100,000,000+',
       '1,000,000,000+', '1,000+', '500,000,000+', '50+', '100+', '500+',
       '10+', '1+', '5+', '0+', '0', 'Free'], dtype=object)

In [259]:
app_clean['Installs'] = app_clean['Installs'].str.replace(',', '')
app_clean['Installs'] = app_clean['Installs'].str.replace('+', '')
app_clean['Installs'] = pd.to_numeric(app_clean['Installs'] , errors='coerce')

In [260]:
app_clean['Content Rating'].unique()

array(['Everyone', 'Teen', 'Everyone 10+', 'Mature 17+',
       'Adults only 18+', 'Unrated', nan], dtype=object)

In [261]:
app_clean['Genres'].unique()

array(['Art & Design', 'Art & Design;Pretend Play',
       'Art & Design;Creativity', 'Art & Design;Action & Adventure',
       'Auto & Vehicles', 'Beauty', 'Books & Reference', 'Business',
       'Comics', 'Comics;Creativity', 'Communication', 'Dating',
       'Education;Education', 'Education', 'Education;Creativity',
       'Education;Music & Video', 'Education;Action & Adventure',
       'Education;Pretend Play', 'Education;Brain Games', 'Entertainment',
       'Entertainment;Music & Video', 'Entertainment;Brain Games',
       'Entertainment;Creativity', 'Events', 'Finance', 'Food & Drink',
       'Health & Fitness', 'House & Home', 'Libraries & Demo',
       'Lifestyle', 'Lifestyle;Pretend Play',
       'Adventure;Action & Adventure', 'Arcade', 'Casual', 'Card',
       'Casual;Pretend Play', 'Action', 'Strategy', 'Puzzle', 'Sports',
       'Music', 'Word', 'Racing', 'Casual;Creativity',
       'Casual;Action & Adventure', 'Simulation', 'Adventure', 'Board',
       'Trivia', 'Role 

In [262]:
app_clean['Genres'].replace('Februaru 11, 2018', np.nan, inplace = True)

In [263]:
app_clean['Last Updated'].sample(10)

9347        March 30, 2017
3891     December 30, 2017
3487         June 18, 2018
4297           May 8, 2017
1580         June 30, 2016
10058      August 28, 2014
3666         July 25, 2018
9401          May 24, 2018
4930        August 1, 2018
6455         July 12, 2018
Name: Last Updated, dtype: object

In [264]:
app_clean['Last Updated'].replace('1.00.19', np.nan, inplace = True)

In [265]:
# Cleaning the Last Updated column.
app_clean['Last Updated'] = app_clean['Last Updated'].astype('str')
app_clean['Updated Year'] = [d.split(',')[-1] for d in app_clean['Last Updated']]
app_clean['x'] = [d.split(',')[0] for d in app_clean['Last Updated']]
app_clean['Updated Month'] =  [d.split(' ')[0] for d in app_clean['x']]
app_clean['Updated Date'] =  [d.split(' ')[-1] for d in app_clean['x']]
app_clean.drop(['x','Last Updated'], axis=1, inplace=True)

In [266]:
# Converting to numeric Data Type.
app_clean['Price'] = pd.to_numeric(app_clean['Price'], errors='coerce')
app_clean['Reviews'] = pd.to_numeric(app_clean['Reviews'], errors='coerce')
app_clean['Updated Year'] = pd.to_numeric(app_clean['Updated Year'], errors='coerce')
app_clean['Updated Date'] = pd.to_numeric(app_clean['Updated Date'], errors='coerce')

In [267]:
# Top 5 Apps with highest Number of Reviews.
app_clean[app_clean['Reviews']>5000000].head(5)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Current Ver,Android Ver,Updated Year,Updated Month,Updated Date
335,Messenger – Text and Video Chat for Free,COMMUNICATION,4.0,56642847,,1000000000.0,Free,0.0,Everyone,Communication,Varies with device,Varies with device,2018.0,August,1.0
336,WhatsApp Messenger,COMMUNICATION,4.4,69119316,,1000000000.0,Free,0.0,Everyone,Communication,Varies with device,Varies with device,2018.0,August,3.0
338,Google Chrome: Fast & Secure,COMMUNICATION,4.3,9642995,,1000000000.0,Free,0.0,Everyone,Communication,Varies with device,Varies with device,2018.0,August,1.0
342,Viber Messenger,COMMUNICATION,4.3,11334799,,500000000.0,Free,0.0,Everyone,Communication,Varies with device,Varies with device,2018.0,July,18.0
351,Opera Mini - fast web browser,COMMUNICATION,4.5,5149854,,100000000.0,Free,0.0,Everyone,Communication,Varies with device,Varies with device,2018.0,July,19.0


In [268]:
# Dropping Unwanted Columns.
app_clean.drop(['App', 'Genres', 'Current Ver', 'Android Ver', 'Content Rating', 'Updated Month', 'Updated Date'], axis=1, inplace=True)

In [269]:
# filling NaNs in Numerical Data.
l = ['Size', 'Updated Year']
for i in l:
    app_clean[i] = app_clean[i].fillna(app_clean[i].median())

In [270]:
#Category with highest number of apps
app_clean['Category'].value_counts().head(1)

FAMILY    1833
Name: Category, dtype: int64

* Category 'Family' has the highest number of apps (1608). 

In [271]:
# Dropping Unwanted Rows of Rating column.
app_clean.dropna(axis=0, inplace=True, subset=['Rating'])

In [272]:
# Creating Rating_cat column.
indx = 0
app_clean['Rating_cat'] = np.zeros((app_clean.shape[0],1))
for i in app_clean['Rating']:
    if i > 3.5:
        app_clean.iloc[indx, -1] = 'High'
    else:
        app_clean.iloc[indx, -1] = 'Low'
    indx = indx + 1

In [273]:
# App with the highest Price. 
app_clean[app_clean['Price'] == max(app_clean['Price'])]

Unnamed: 0,Category,Rating,Reviews,Size,Installs,Type,Price,Updated Year,Rating_cat
4367,LIFESTYLE,3.6,275,7475.2,10000.0,Paid,400.0,2018.0,High


In [274]:
# Dropping Unwanted Rows
app_clean.dropna(axis=0, inplace=True)

In [275]:
app_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8196 entries, 0 to 10840
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Category      8196 non-null   object 
 1   Rating        8196 non-null   float64
 2   Reviews       8196 non-null   int64  
 3   Size          8196 non-null   float64
 4   Installs      8196 non-null   float64
 5   Type          8196 non-null   object 
 6   Price         8196 non-null   float64
 7   Updated Year  8196 non-null   float64
 8   Rating_cat    8196 non-null   object 
dtypes: float64(5), int64(1), object(3)
memory usage: 640.3+ KB


In [276]:
# One hot Encoding of Categorical Columns.
final_app = pd.get_dummies(app_clean, columns=[ 'Type', 'Rating_cat'], drop_first=True)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
final_app['Installs'] = le.fit_transform(app_clean['Installs'])
le1 = LabelEncoder()
final_app['Category'] = le.fit_transform(app_clean['Category'])

In [277]:
final_app.sample(8)

Unnamed: 0,Category,Rating,Reviews,Size,Installs,Price,Updated Year,Type_Paid,Rating_cat_Low
10738,14,3.8,196,11264.0,7,0.0,2014.0,0,0
7495,24,3.6,100,2560.0,8,0.0,2018.0,0,0
7320,11,5.0,7,2355.2,6,0.0,2018.0,0,0
2170,11,4.8,97,3584.0,6,2.99,2018.0,1,0
9055,20,4.8,6,16384.0,5,0.0,2018.0,0,0
589,7,4.4,218,27648.0,8,0.0,2018.0,0,0
4190,18,4.9,145,49152.0,7,1.99,2016.0,1,0
446,6,4.2,15287,17408.0,12,0.0,2018.0,0,0


In [278]:
final_app.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8196 entries, 0 to 10840
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Category        8196 non-null   int32  
 1   Rating          8196 non-null   float64
 2   Reviews         8196 non-null   int64  
 3   Size            8196 non-null   float64
 4   Installs        8196 non-null   int64  
 5   Price           8196 non-null   float64
 6   Updated Year    8196 non-null   float64
 7   Type_Paid       8196 non-null   uint8  
 8   Rating_cat_Low  8196 non-null   uint8  
dtypes: float64(4), int32(1), int64(2), uint8(2)
memory usage: 496.2 KB


# Data Preperation

In [279]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [280]:
x = final_app.drop(['Rating_cat_Low', 'Rating'], axis=1)
y = final_app['Rating_cat_Low']
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, stratify = y, random_state = 10)

In [281]:
def evaluate(model):
    model.fit(Xtrain, ytrain)
    pred = model.predict(Xtest)
    cval = cross_val_score(model, x, y, cv=5)
    print("Training Accuracy:", model.score(Xtrain, ytrain))
    print("Testing Accuracy:", model.score(Xtest, ytest))
    print('Bias Error', cval.mean())
    print("Variance Error", cval.std())
    print(confusion_matrix(ytest, pred))
    print(classification_report(ytest, pred))

# Model Building

## Decision Tree.

In [282]:
dt = DecisionTreeClassifier(criterion='entropy', random_state=10)
evaluate(dt)

Training Accuracy: 0.9998256928708384
Testing Accuracy: 0.8255388369255795
Bias Error 0.8040459679459516
Variance Error 0.020409567168465995
[[1972  225]
 [ 204   58]]
              precision    recall  f1-score   support

           0       0.91      0.90      0.90      2197
           1       0.20      0.22      0.21       262

    accuracy                           0.83      2459
   macro avg       0.56      0.56      0.56      2459
weighted avg       0.83      0.83      0.83      2459



In [283]:
feature_importances = pd.DataFrame({'features': Xtrain.columns, 'Importance': dt.feature_importances_})
feature_importances

Unnamed: 0,features,Importance
0,Category,0.14713
1,Reviews,0.44264
2,Size,0.24543
3,Installs,0.069857
4,Price,0.015163
5,Updated Year,0.070031
6,Type_Paid,0.009749


## Bagging Classifier

In [284]:
bg = BaggingClassifier(base_estimator=dt, n_estimators=30, random_state=10)
evaluate(bg)

Training Accuracy: 0.9982569287083842
Testing Accuracy: 0.8812525416836112
Bias Error 0.8740830964746499
Variance Error 0.009649385952758162
[[2148   49]
 [ 243   19]]
              precision    recall  f1-score   support

           0       0.90      0.98      0.94      2197
           1       0.28      0.07      0.12       262

    accuracy                           0.88      2459
   macro avg       0.59      0.53      0.53      2459
weighted avg       0.83      0.88      0.85      2459



## Random Forest

In [285]:
rf = RandomForestClassifier(n_estimators=50, random_state=10)
evaluate(rf)

Training Accuracy: 0.9996513857416769
Testing Accuracy: 0.8865392435949573
Bias Error 0.8771339603267906
Variance Error 0.0071513168505149
[[2167   30]
 [ 249   13]]
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      2197
           1       0.30      0.05      0.09       262

    accuracy                           0.89      2459
   macro avg       0.60      0.52      0.51      2459
weighted avg       0.83      0.89      0.85      2459



## Gradient Boosting

In [286]:
gb = GradientBoostingClassifier(learning_rate=0.1, n_estimators=70, random_state=10)
evaluate(gb)

Training Accuracy: 0.8961129510196967
Testing Accuracy: 0.8934526230174867
Bias Error 0.8890921739906844
Variance Error 0.0038682924859628825
[[2195    2]
 [ 260    2]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.50      0.01      0.02       262

    accuracy                           0.89      2459
   macro avg       0.70      0.50      0.48      2459
weighted avg       0.85      0.89      0.84      2459



## AdaBoost

In [287]:
ab = AdaBoostClassifier(base_estimator= DecisionTreeClassifier(max_depth=3, random_state=10), 
                        learning_rate=0.1,n_estimators = 70 ,random_state=10 )
evaluate(ab)

Training Accuracy: 0.8983789436987972
Testing Accuracy: 0.8906059373729158
Bias Error 0.8904341582464026
Variance Error 0.0030356009757154397
[[2189    8]
 [ 261    1]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.11      0.00      0.01       262

    accuracy                           0.89      2459
   macro avg       0.50      0.50      0.47      2459
weighted avg       0.81      0.89      0.84      2459



## VotingClassifier

In [288]:
vc = VotingClassifier([('ab', ab), ('gb', gb), ('bg', bg), ('rf', rf)], voting='soft')
evaluate(vc)

Training Accuracy: 0.9604322816803207
Testing Accuracy: 0.8914192761285076
Bias Error 0.8887255018675873
Variance Error 0.005821479726985801
[[2187   10]
 [ 257    5]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.33      0.02      0.04       262

    accuracy                           0.89      2459
   macro avg       0.61      0.51      0.49      2459
weighted avg       0.84      0.89      0.85      2459



In [289]:
vc = VotingClassifier([('ab', ab), ('gb', gb), ('bg', bg), ('rf', rf)], voting='hard')
evaluate(vc)

Training Accuracy: 0.9001220149904131
Testing Accuracy: 0.8934526230174867
Bias Error 0.8911661631869523
Variance Error 0.0020768517479941026
[[2195    2]
 [ 260    2]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.50      0.01      0.02       262

    accuracy                           0.89      2459
   macro avg       0.70      0.50      0.48      2459
weighted avg       0.85      0.89      0.84      2459



In [290]:
sc = StackingClassifier([('ab', ab), ('gb', gb), ('bg', bg), ('rf', rf)])
evaluate(sc)

Training Accuracy: 0.9097089070943002
Testing Accuracy: 0.8922326148840992
Bias Error 0.8932405988184348
Variance Error 2.604205419722838e-05
[[2192    5]
 [ 260    2]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.29      0.01      0.01       262

    accuracy                           0.89      2459
   macro avg       0.59      0.50      0.48      2459
weighted avg       0.83      0.89      0.84      2459



In [291]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
evaluate(lr)

Training Accuracy: 0.8933240369531114
Testing Accuracy: 0.8934526230174867
Bias Error 0.8933626244438161
Variance Error 0.00023887528887684645
[[2197    0]
 [ 262    0]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2197
           1       0.00      0.00      0.00       262

    accuracy                           0.89      2459
   macro avg       0.45      0.50      0.47      2459
weighted avg       0.80      0.89      0.84      2459



  _warn_prf(average, modifier, msg_start, len(result))


## Conslusions

* All the Ensembeles models have an accuracy around 87%-89% compared to Decision Tree's 80%. 
* The Confusion matrix and Classification report suggest that the models are predicting the dominant Class 0 (High Rating) more accurately than the Class 1 (Low Rating).
* Logitstic Regression could not make the predictions for the minority class.
* The class imbalance is hindering the accuracies and model is able to identify the dominant class. Classes must be balanced to achieve greater Precision and Recall.