In [1821]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv

## 1. Dataset 1 - Kenya_airways_flights

In [1822]:
import pandas as pd

def inspect_dataframe(df):
    """
    This function takes a pandas DataFrame as input and prints the following:
    1. Info about the DataFrame (including data types and non-null counts)
    2. Shape of the DataFrame (number of rows and columns)
    3. Count of missing values per column
    4. Count of duplicate rows
    """
    # 1. Info about the DataFrame
    print("---- DataFrame Info ----")
    df.info()
    print("\n")
    
    # 2. Shape of the DataFrame
    print("---- DataFrame Shape ----")
    print(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")
    print("\n")
    
    # 3. Missing values per column
    print("---- Missing Values ----")
    missing_values = df.isnull().sum()
    print(missing_values[missing_values > 0])
    print("\n")
    
    # 4. Count of duplicate rows
    print("---- Duplicate Rows ----")
    duplicate_count = df.duplicated().sum()
    print(f"Number of duplicate rows: {duplicate_count}")
    print("\n")

# Example usage:
df = pd.read_csv("kenya_airways_flights (1).csv", delimiter=',', on_bad_lines='skip', encoding='utf-8')
inspect_dataframe(df)


---- DataFrame Info ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2362 entries, 0 to 2361
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Title           2362 non-null   object
 1   Image           2239 non-null   object
 2   Avatar_URL      2353 non-null   object
 3   crvsd           2352 non-null   object
 4   ui_header_link  2351 non-null   object
 5   default         1855 non-null   object
 6   phmbo           2352 non-null   object
 7   phmbo1          2110 non-null   object
 8   dmrsr           2350 non-null   object
 9   dmrsr2          2350 non-null   object
 10  dmrsr3          2348 non-null   object
 11  qwuub_URL       2352 non-null   object
 12  qwuub           2352 non-null   object
 13  tehyy           2213 non-null   object
 14  xcjrc           2352 non-null   object
 15  Rating          2352 non-null   object
dtypes: object(16)
memory usage: 295.4+ KB


---- DataFrame Shape ----
Rows:

In [1823]:
df = pd.read_csv("kenya_airways_flights (1).csv", delimiter=',', on_bad_lines='skip', encoding='utf-8')
df.head(1)

Unnamed: 0,Title,Image,Avatar_URL,crvsd,ui_header_link,default,phmbo,phmbo1,dmrsr,dmrsr2,dmrsr3,qwuub_URL,qwuub,tehyy,xcjrc,Rating
0,Recently I travelled for business from Cape To...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/dewaldr,Dewald R wrote a review Nov 2022,Dewald R,"Cape Town Central, South Africa",2336 contributions,198 helpful votes,Cape Town Central - Entebbe,Africa,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,Cape Town to Uganda via Nairobi and Back,Date of travel: September 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40


In [1824]:
# Dropping the duplicate rows
df = df.drop_duplicates()
df.reset_index(drop=True, inplace=True)

df

Unnamed: 0,Title,Image,Avatar_URL,crvsd,ui_header_link,default,phmbo,phmbo1,dmrsr,dmrsr2,dmrsr3,qwuub_URL,qwuub,tehyy,xcjrc,Rating
0,Recently I travelled for business from Cape To...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/dewaldr,Dewald R wrote a review Nov 2022,Dewald R,"Cape Town Central, South Africa",2336 contributions,198 helpful votes,Cape Town Central - Entebbe,Africa,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,Cape Town to Uganda via Nairobi and Back,Date of travel: September 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40
1,I want to thanks to Jane for her excellent ser...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/Alien555,Jose wrote a review Nov 2022,Jose,,1 contribution,,New York City - Cape Town Central,International,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,I wanted to try becuase I read the reviews but...,Date of travel: November 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_50
2,not a bad airline to fly with could do with a ...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/eddiepB8777IQ,Eddie P wrote a review Nov 2022,Eddie P,"Cork, Ireland",1 contribution,,London - Nairobi,International,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,kenya airways review,Date of travel: May 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40
3,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/VirginieAn,Virg A wrote a review Nov 2022,Virg A,,6 contributions,,Bangui - Zanzibar Island,Africa,Business Class,https://www.tripadvisor.com/ShowUserReviews-g1...,Horrible experience,Date of travel: November 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_10
4,"Absolutely appalling airline, never ever use i...",https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/H5647LLjasonm,Jason M wrote a review Nov 2022,Jason M,"London, United Kingdom",22 contributions,6 helpful votes,Entebbe - Lilongwe,Africa,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,Avoid this airline like a bargepole,Date of travel: November 2022,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2340,The flight was a good one. Staff are friendly ...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/973christine,973christine wrote a review Jan 2016,973christine,"Nairobi, Kenya",40 contributions,8 helpful votes,Nairobi - Zanzibar Island,Africa,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,Good service,Date of travel: October 2015,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40
2341,It was an overnight flight so we managed to sl...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/548pinam,Pina M wrote a review Jan 2016,Pina M,"London, United Kingdom",138 contributions,41 helpful votes,London - Victoria,International,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,The staff is outstanding,Date of travel: November 2015,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40
2342,Kenya Airways is by far the most acceptable fo...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/147azhark,Azhar K wrote a review Jan 2016,Azhar K,"Watamu, Coast, Kenya",74 contributions,21 helpful votes,Mombasa - Dubai,International,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,The best in Africa,Date of travel: November 2015,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_40
2343,During our trip to South Africa we traveled fo...,https://media-cdn.tripadvisor.com/media/photo-...,https://www.tripadvisor.com/Profile/Ash_paul,Ash_paul wrote a review Jan 2016,Ash_paul,Kolkata,56 contributions,28 helpful votes,Mumbai - Nairobi,International,Economy,https://www.tripadvisor.com/ShowUserReviews-g1...,Inconsistent service,Date of travel: January 2016,This review is the subjective opinion of a Tri...,ui_bubble_rating bubble_30


In [1825]:
df1 = df.drop(columns= ['Image', 'Avatar_URL', 'crvsd', 'ui_header_link', 'default', 'phmbo', 'phmbo1', 'dmrsr', 'qwuub_URL', 'xcjrc'])
df1

Unnamed: 0,Title,dmrsr2,dmrsr3,qwuub,tehyy,Rating
0,Recently I travelled for business from Cape To...,Africa,Economy,Cape Town to Uganda via Nairobi and Back,Date of travel: September 2022,ui_bubble_rating bubble_40
1,I want to thanks to Jane for her excellent ser...,International,Economy,I wanted to try becuase I read the reviews but...,Date of travel: November 2022,ui_bubble_rating bubble_50
2,not a bad airline to fly with could do with a ...,International,Economy,kenya airways review,Date of travel: May 2022,ui_bubble_rating bubble_40
3,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",Africa,Business Class,Horrible experience,Date of travel: November 2022,ui_bubble_rating bubble_10
4,"Absolutely appalling airline, never ever use i...",Africa,Economy,Avoid this airline like a bargepole,Date of travel: November 2022,ui_bubble_rating bubble_10
...,...,...,...,...,...,...
2340,The flight was a good one. Staff are friendly ...,Africa,Economy,Good service,Date of travel: October 2015,ui_bubble_rating bubble_40
2341,It was an overnight flight so we managed to sl...,International,Economy,The staff is outstanding,Date of travel: November 2015,ui_bubble_rating bubble_40
2342,Kenya Airways is by far the most acceptable fo...,International,Economy,The best in Africa,Date of travel: November 2015,ui_bubble_rating bubble_40
2343,During our trip to South Africa we traveled fo...,International,Economy,Inconsistent service,Date of travel: January 2016,ui_bubble_rating bubble_30


In [1826]:
df1['Review'] = df1['Title'] + ' ' + df1['qwuub']
df1 = df1.drop(columns= ['Title', 'qwuub'])
df1.head(5)


Unnamed: 0,dmrsr2,dmrsr3,tehyy,Rating,Review
0,Africa,Economy,Date of travel: September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...
1,International,Economy,Date of travel: November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...
2,International,Economy,Date of travel: May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...
3,Africa,Business Class,Date of travel: November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz..."
4,Africa,Economy,Date of travel: November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i..."


In [1827]:
df1.rename(columns={'dmrsr2': 'Departures', 'dmrsr3': 'Class', 'tehyy': 'Date'}, inplace=True)
df1.head(5)

Unnamed: 0,Departures,Class,Date,Rating,Review
0,Africa,Economy,Date of travel: September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...
1,International,Economy,Date of travel: November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...
2,International,Economy,Date of travel: May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...
3,Africa,Business Class,Date of travel: November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz..."
4,Africa,Economy,Date of travel: November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i..."


In [1828]:
missing_values = df1.isnull().sum()

# Display the number of missing values for each column
print(missing_values)

Departures     12
Class          14
Date          148
Rating         10
Review         10
dtype: int64


In [1829]:
# Fill missing values with a specific date
df1['Date'].fillna('2020-01-01', inplace=True)

df1.sample(5)

Unnamed: 0,Departures,Class,Date,Rating,Review
2102,Africa,Economy,Date of travel: September 2016,ui_bubble_rating bubble_30,Travelled with Kenya airways. My flight to Ken...
1430,International,Economy,Date of travel: April 2018,ui_bubble_rating bubble_30,Food service was pretty poor- about half of th...
1541,Africa,Economy,Date of travel: January 2018,ui_bubble_rating bubble_40,Friendly staff made this early morning flight ...
1050,Africa,Economy,2020-01-01,ui_bubble_rating bubble_40,We took this trip (return) at a reasonable pri...
1927,Africa,Economy,Date of travel: March 2017,ui_bubble_rating bubble_10,Our baggage was delayed on a flight from Kenya...


In [1830]:
# Strip 'Date of travel: ' from the Date column
df1['Date'] = df1['Date'].str.replace('Date of travel: ', '');

df1.head(5)

Unnamed: 0,Departures,Class,Date,Rating,Review
0,Africa,Economy,September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...
1,International,Economy,November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...
2,International,Economy,May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...
3,Africa,Business Class,November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz..."
4,Africa,Economy,November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i..."


In [1831]:
# Convert to datetime, invalid parsing will be set as NaT
df1['Date3'] = pd.to_datetime(df1['Date'], errors='coerce')

# Display DataFrame with converted timestamps
df1

  df1['Date3'] = pd.to_datetime(df1['Date'], errors='coerce')


Unnamed: 0,Departures,Class,Date,Rating,Review,Date3
0,Africa,Economy,September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...,2022-09-01
1,International,Economy,November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...,2022-11-01
2,International,Economy,May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...,2022-05-01
3,Africa,Business Class,November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",2022-11-01
4,Africa,Economy,November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i...",2022-11-01
...,...,...,...,...,...,...
2340,Africa,Economy,October 2015,ui_bubble_rating bubble_40,The flight was a good one. Staff are friendly ...,2015-10-01
2341,International,Economy,November 2015,ui_bubble_rating bubble_40,It was an overnight flight so we managed to sl...,2015-11-01
2342,International,Economy,November 2015,ui_bubble_rating bubble_40,Kenya Airways is by far the most acceptable fo...,2015-11-01
2343,International,Economy,January 2016,ui_bubble_rating bubble_30,During our trip to South Africa we traveled fo...,2016-01-01


In [1832]:
# Set the 'Date' column as the index
df1.set_index('Date3', inplace=True)
df1

Unnamed: 0_level_0,Departures,Class,Date,Rating,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,Africa,Economy,September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...
2022-11-01,International,Economy,November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...
2022-05-01,International,Economy,May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...
2022-11-01,Africa,Business Class,November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz..."
2022-11-01,Africa,Economy,November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i..."
...,...,...,...,...,...
2015-10-01,Africa,Economy,October 2015,ui_bubble_rating bubble_40,The flight was a good one. Staff are friendly ...
2015-11-01,International,Economy,November 2015,ui_bubble_rating bubble_40,It was an overnight flight so we managed to sl...
2015-11-01,International,Economy,November 2015,ui_bubble_rating bubble_40,Kenya Airways is by far the most acceptable fo...
2016-01-01,International,Economy,January 2016,ui_bubble_rating bubble_30,During our trip to South Africa we traveled fo...


In [1833]:
# confirm missing values
missing_values = df1.isnull().sum()

# Display the number of missing values for each column
print(missing_values)

Departures    12
Class         14
Date           0
Rating        10
Review        10
dtype: int64


In [1834]:
# Define placeholders for each column
placeholders = {
    'Departures': 0,
    'Class': 'Unknown',
    'Rating': 0,
    'Review': 'No Review'
}

# Replace missing values with respective placeholders
Cleaned_df1 = df1.fillna(value=placeholders)

missing_values = Cleaned_df1.isnull().sum()

# Display the number of missing values for each column
print(missing_values)


Departures    0
Class         0
Date          0
Rating        0
Review        0
dtype: int64


In [1835]:
Cleaned_df1.head()

Unnamed: 0_level_0,Departures,Class,Date,Rating,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,Africa,Economy,September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...
2022-11-01,International,Economy,November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...
2022-05-01,International,Economy,May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...
2022-11-01,Africa,Business Class,November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz..."
2022-11-01,Africa,Economy,November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i..."


In [1836]:

# Extract numeric rating from the 'Rating' column
Cleaned_df1['Rating_Value'] = Cleaned_df1['Rating'].str.extract('bubble_(\d+)')

# Convert the extracted values to numeric, replacing non-numeric values with NaN
Cleaned_df1['Rating_Value'] = pd.to_numeric(Cleaned_df1['Rating_Value'], errors='coerce')

# Scale the ratings from 20, 30, etc., to 2.0, 3.0, etc.
Cleaned_df1['Rating_Value'] = Cleaned_df1['Rating_Value'] / 10

# Optional: Update the rating description to fit the new scale
def rating_description(value):
    if pd.isna(value):
        return 'Unknown'
    if value >= 5.0:
        return 'Excellent'
    elif value >= 4.0:
        return 'Good'
    elif value >= 3.0:
        return 'Average'
    else:
        return 'Poor'

Cleaned_df1['Rating_Description'] = Cleaned_df1['Rating_Value'].apply(rating_description)

# Display the updated DataFrame
Cleaned_df1

Unnamed: 0_level_0,Departures,Class,Date,Rating,Review,Rating_Value,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-09-01,Africa,Economy,September 2022,ui_bubble_rating bubble_40,Recently I travelled for business from Cape To...,4.0,Good
2022-11-01,International,Economy,November 2022,ui_bubble_rating bubble_50,I want to thanks to Jane for her excellent ser...,5.0,Excellent
2022-05-01,International,Economy,May 2022,ui_bubble_rating bubble_40,not a bad airline to fly with could do with a ...,4.0,Good
2022-11-01,Africa,Business Class,November 2022,ui_bubble_rating bubble_10,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",1.0,Poor
2022-11-01,Africa,Economy,November 2022,ui_bubble_rating bubble_10,"Absolutely appalling airline, never ever use i...",1.0,Poor
...,...,...,...,...,...,...,...
2015-10-01,Africa,Economy,October 2015,ui_bubble_rating bubble_40,The flight was a good one. Staff are friendly ...,4.0,Good
2015-11-01,International,Economy,November 2015,ui_bubble_rating bubble_40,It was an overnight flight so we managed to sl...,4.0,Good
2015-11-01,International,Economy,November 2015,ui_bubble_rating bubble_40,Kenya Airways is by far the most acceptable fo...,4.0,Good
2016-01-01,International,Economy,January 2016,ui_bubble_rating bubble_30,During our trip to South Africa we traveled fo...,3.0,Average


In [1837]:
# Drop the original 'Rating' column 
Cleaned_df1 = Cleaned_df1.drop(columns=['Rating', 'Date'])   
Cleaned_df1

Unnamed: 0_level_0,Departures,Class,Review,Rating_Value,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,Africa,Economy,Recently I travelled for business from Cape To...,4.0,Good
2022-11-01,International,Economy,I want to thanks to Jane for her excellent ser...,5.0,Excellent
2022-05-01,International,Economy,not a bad airline to fly with could do with a ...,4.0,Good
2022-11-01,Africa,Business Class,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",1.0,Poor
2022-11-01,Africa,Economy,"Absolutely appalling airline, never ever use i...",1.0,Poor
...,...,...,...,...,...
2015-10-01,Africa,Economy,The flight was a good one. Staff are friendly ...,4.0,Good
2015-11-01,International,Economy,It was an overnight flight so we managed to sl...,4.0,Good
2015-11-01,International,Economy,Kenya Airways is by far the most acceptable fo...,4.0,Good
2016-01-01,International,Economy,During our trip to South Africa we traveled fo...,3.0,Average


##  Dataset 2 - Kenya_airways_reviews

In [1838]:
df2 = pd.read_csv('kenya_airways_reviews.csv')
df2

Unnamed: 0,Date,Review
0,3rd August 2024,✅ Trip Verified | I have never seen such a dis...
1,2nd August 2024,Not Verified | Bumped off flight at boarding...
2,30th July 2024,✅ Trip Verified | I am disappointment with t...
3,25th July 2024,✅ Trip Verified | This airline is becoming mo...
4,24th July 2024,✅ Trip Verified | 3 out of 4 flights delayed ...
...,...,...
473,18th July 2011,JNB-NBO-JNB. This trip showed both the good an...
474,17th July 2011,Nairobi-London. Lounge in Nairobi was small an...
475,11th July 2011,Nairobi-London. As me and my wife are both whe...
476,6th July 2011,LUN-LLW in Economy. Adequate for a 1 hr trip. ...


In [1839]:
inspect_dataframe(df2)

---- DataFrame Info ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 478 entries, 0 to 477
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    478 non-null    object
 1   Review  478 non-null    object
dtypes: object(2)
memory usage: 7.6+ KB


---- DataFrame Shape ----
Rows: 478, Columns: 2


---- Missing Values ----
Series([], dtype: int64)


---- Duplicate Rows ----
Number of duplicate rows: 1




In [1840]:
# Dropping the duplicate rows
df2 = df2.drop_duplicates()
df2.reset_index(drop=True, inplace=True)

df2

Unnamed: 0,Date,Review
0,3rd August 2024,✅ Trip Verified | I have never seen such a dis...
1,2nd August 2024,Not Verified | Bumped off flight at boarding...
2,30th July 2024,✅ Trip Verified | I am disappointment with t...
3,25th July 2024,✅ Trip Verified | This airline is becoming mo...
4,24th July 2024,✅ Trip Verified | 3 out of 4 flights delayed ...
...,...,...
472,18th July 2011,JNB-NBO-JNB. This trip showed both the good an...
473,17th July 2011,Nairobi-London. Lounge in Nairobi was small an...
474,11th July 2011,Nairobi-London. As me and my wife are both whe...
475,6th July 2011,LUN-LLW in Economy. Adequate for a 1 hr trip. ...


In [1841]:
df2.sample(10)

Unnamed: 0,Date,Review
330,4th December 2014,Recently flew from LHR to Nairobi on to Kilima...
9,27th June 2024,✅ Trip Verified | One of the worst flights i ...
349,2nd June 2014,Travelled on KQ from Nairobi to Bangkok in eco...
324,14th January 2015,Flew from Paris via Amsterdam to Nairobi. The ...
394,9th October 2012,I found the flight horrible. My partners T.V d...
127,21st March 2021,✅ Trip Verified | This was not my first time f...
51,27th July 2023,✅ Trip Verified | Do not use Kenya Airways! Th...
46,22nd August 2023,Not Verified | Airline was 1 hour late to boar...
182,27th July 2019,Not Verified | New York to Dar es Saalam via ...
426,15th February 2012,NBO-AMS. Checked in on-line baggage drop very ...


In [1842]:
from datetime import datetime

# Function to remove ordinal suffixes
def remove_ordinal_suffix(date_str):
    return date_str.replace('st', '').replace('nd', '').replace('rd', '').replace('th', '')

# Apply the function to remove suffixes
df2['Date'] = df2['Date'].apply(remove_ordinal_suffix)

# Convert to datetime with errors='coerce'
df2['Date'] = pd.to_datetime(df2['Date'], format='%d %B %Y', errors='coerce')

df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Date'] = df2['Date'].apply(remove_ordinal_suffix)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Date'] = pd.to_datetime(df2['Date'], format='%d %B %Y', errors='coerce')


Unnamed: 0,Date,Review
0,NaT,✅ Trip Verified | I have never seen such a dis...
1,NaT,Not Verified | Bumped off flight at boarding...
2,2024-07-30,✅ Trip Verified | I am disappointment with t...
3,2024-07-25,✅ Trip Verified | This airline is becoming mo...
4,2024-07-24,✅ Trip Verified | 3 out of 4 flights delayed ...
...,...,...
472,2011-07-18,JNB-NBO-JNB. This trip showed both the good an...
473,2011-07-17,Nairobi-London. Lounge in Nairobi was small an...
474,2011-07-11,Nairobi-London. As me and my wife are both whe...
475,2011-07-06,LUN-LLW in Economy. Adequate for a 1 hr trip. ...


In [1843]:
placeholder_date = pd.to_datetime('24.02.2024', format='%d.%m.%Y')

# Replace NaT values with the placeholder date
df2['Date3'] = df2['Date'].fillna(placeholder_date)

df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Date3'] = df2['Date'].fillna(placeholder_date)


Unnamed: 0,Date,Review,Date3
0,NaT,✅ Trip Verified | I have never seen such a dis...,2024-02-24
1,NaT,Not Verified | Bumped off flight at boarding...,2024-02-24
2,2024-07-30,✅ Trip Verified | I am disappointment with t...,2024-07-30
3,2024-07-25,✅ Trip Verified | This airline is becoming mo...,2024-07-25
4,2024-07-24,✅ Trip Verified | 3 out of 4 flights delayed ...,2024-07-24
...,...,...,...
472,2011-07-18,JNB-NBO-JNB. This trip showed both the good an...,2011-07-18
473,2011-07-17,Nairobi-London. Lounge in Nairobi was small an...,2011-07-17
474,2011-07-11,Nairobi-London. As me and my wife are both whe...,2011-07-11
475,2011-07-06,LUN-LLW in Economy. Adequate for a 1 hr trip. ...,2011-07-06


In [1844]:
# Set the 'Date' column as the index
df2.set_index('Date3', inplace=True)

df2

Unnamed: 0_level_0,Date,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-02-24,NaT,✅ Trip Verified | I have never seen such a dis...
2024-02-24,NaT,Not Verified | Bumped off flight at boarding...
2024-07-30,2024-07-30,✅ Trip Verified | I am disappointment with t...
2024-07-25,2024-07-25,✅ Trip Verified | This airline is becoming mo...
2024-07-24,2024-07-24,✅ Trip Verified | 3 out of 4 flights delayed ...
...,...,...
2011-07-18,2011-07-18,JNB-NBO-JNB. This trip showed both the good an...
2011-07-17,2011-07-17,Nairobi-London. Lounge in Nairobi was small an...
2011-07-11,2011-07-11,Nairobi-London. As me and my wife are both whe...
2011-07-06,2011-07-06,LUN-LLW in Economy. Adequate for a 1 hr trip. ...


In [1845]:
df2 = df2.drop('Date', axis=1)

df2

Unnamed: 0_level_0,Review
Date3,Unnamed: 1_level_1
2024-02-24,✅ Trip Verified | I have never seen such a dis...
2024-02-24,Not Verified | Bumped off flight at boarding...
2024-07-30,✅ Trip Verified | I am disappointment with t...
2024-07-25,✅ Trip Verified | This airline is becoming mo...
2024-07-24,✅ Trip Verified | 3 out of 4 flights delayed ...
...,...
2011-07-18,JNB-NBO-JNB. This trip showed both the good an...
2011-07-17,Nairobi-London. Lounge in Nairobi was small an...
2011-07-11,Nairobi-London. As me and my wife are both whe...
2011-07-06,LUN-LLW in Economy. Adequate for a 1 hr trip. ...


## Dataset 3 - KQ_Reviews

In [1846]:
df3 = pd.read_csv('KQ_Reviews.csv')
df3.sample(10)

Unnamed: 0,Heading,Body,Star Rating,Date
73,I would avoid this airline,I am strongly disappointed by this airline. Th...,,2023-01-25T08:39:05.000Z
205,"Uncaring, Unresponsive, Unimpressed!!!","For at least the last 5 days, their modify-boo...",1.0,2020-05-29T13:06:56.000Z
83,I travelled Business Class,"I travelled Business Class, it was dreadful. C...",,2022-02-26T06:48:21.000Z
55,I had to cancel my trip to Kenya (Kenya…,I had to cancel my trip to Kenya (Kenya Airway...,,2023-12-04T12:26:55.000Z
114,Used to be a fan not anymore,Used to be a fan not anymoreThey have lost my ...,1.0,2023-03-02T09:59:40.000Z
233,Kenya airways has arrogant staff and…,Kenya airways has arrogant staff and arrogant ...,2.0,2018-05-05T17:02:46.000Z
168,Incompetent customer service unable to…,Incompetent customer service unable to resolve...,,2021-10-24T21:31:12.000Z
58,Booked a ticket,"Booked a ticket, payed but never got the ticke...",,2023-03-31T11:30:49.000Z
126,Hi when was at Heathrow had confirmed…,Hi when was at Heathrow had confirmed all seat...,,2022-08-27T07:40:12.000Z
35,I’m so shocked they don’t have 5*…,I’m so shocked that they do not have 5* review...,,2023-04-07T13:04:45.000Z


In [1847]:
inspect_dataframe(df3)

---- DataFrame Info ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Heading      244 non-null    object 
 1   Body         244 non-null    object 
 2   Star Rating  76 non-null     float64
 3   Date         244 non-null    object 
dtypes: float64(1), object(3)
memory usage: 7.8+ KB


---- DataFrame Shape ----
Rows: 244, Columns: 4


---- Missing Values ----
Star Rating    168
dtype: int64


---- Duplicate Rows ----
Number of duplicate rows: 1




In [1848]:
# Dropping the duplicate rows
df3 = df3.drop_duplicates()
df3.reset_index(drop=True, inplace=True)

df3

Unnamed: 0,Heading,Body,Star Rating,Date
0,Constantly canceling flights,Kenya airlines canceled my flight mid my journ...,,2024-08-02T17:39:56.000Z
1,Kenya Airways canceled the flight and…,Kenya Airways canceled the flight and never re...,,2024-08-04T20:05:08.000Z
2,If your thinking of Kenya Airlines...think aga...,If your thinking of using this airline...RUN d...,,2024-07-30T21:23:34.000Z
3,Worst airlines ever,"Worst airlines ever, not friendly staff, no fl...",,2024-07-29T23:47:06.000Z
4,We were trying to go to Nampula from Nairobi a...,We were trying to go from Europe through Nairo...,,2024-07-25T09:20:16.000Z
...,...,...,...,...
238,Better than I expected,Much better service than I had expected - espe...,,2017-01-26T17:27:53.000Z
239,Request for immediate action.,"I have never had a night mare, until I used Ke...",,2016-08-30T16:52:59.000Z
240,Never again,Flew with Kenya airways last year in October.U...,,2016-07-02T10:26:21.000Z
241,NEVER AGAIN - RUBISH COMPANY !!!,We travelled to Zanzibar in October 2015. From...,1.0,2015-11-21T22:19:18.081Z


In [1849]:
df3['Star Rating'] = df3['Star Rating'].fillna(0)
df3

Unnamed: 0,Heading,Body,Star Rating,Date
0,Constantly canceling flights,Kenya airlines canceled my flight mid my journ...,0.0,2024-08-02T17:39:56.000Z
1,Kenya Airways canceled the flight and…,Kenya Airways canceled the flight and never re...,0.0,2024-08-04T20:05:08.000Z
2,If your thinking of Kenya Airlines...think aga...,If your thinking of using this airline...RUN d...,0.0,2024-07-30T21:23:34.000Z
3,Worst airlines ever,"Worst airlines ever, not friendly staff, no fl...",0.0,2024-07-29T23:47:06.000Z
4,We were trying to go to Nampula from Nairobi a...,We were trying to go from Europe through Nairo...,0.0,2024-07-25T09:20:16.000Z
...,...,...,...,...
238,Better than I expected,Much better service than I had expected - espe...,0.0,2017-01-26T17:27:53.000Z
239,Request for immediate action.,"I have never had a night mare, until I used Ke...",0.0,2016-08-30T16:52:59.000Z
240,Never again,Flew with Kenya airways last year in October.U...,0.0,2016-07-02T10:26:21.000Z
241,NEVER AGAIN - RUBISH COMPANY !!!,We travelled to Zanzibar in October 2015. From...,1.0,2015-11-21T22:19:18.081Z


In [1850]:
# Convert the 'Date' column to datetime with errors='coerce'
df3['Date'] = pd.to_datetime(df3['Date'], errors='coerce')

# Combine day, month, and year into a single column in the format 'dd-mm-yyyy'
df3['Date3'] = df3['Date'].dt.strftime('%d-%m-%Y')

# Set the new column as the index
df3.set_index('Date3', inplace=True)

# Drop the original 'Date' column if no longer needed
df3.drop(columns='Date', inplace=True)

df3

Unnamed: 0_level_0,Heading,Body,Star Rating
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
02-08-2024,Constantly canceling flights,Kenya airlines canceled my flight mid my journ...,0.0
04-08-2024,Kenya Airways canceled the flight and…,Kenya Airways canceled the flight and never re...,0.0
30-07-2024,If your thinking of Kenya Airlines...think aga...,If your thinking of using this airline...RUN d...,0.0
29-07-2024,Worst airlines ever,"Worst airlines ever, not friendly staff, no fl...",0.0
25-07-2024,We were trying to go to Nampula from Nairobi a...,We were trying to go from Europe through Nairo...,0.0
...,...,...,...
26-01-2017,Better than I expected,Much better service than I had expected - espe...,0.0
30-08-2016,Request for immediate action.,"I have never had a night mare, until I used Ke...",0.0
02-07-2016,Never again,Flew with Kenya airways last year in October.U...,0.0
21-11-2015,NEVER AGAIN - RUBISH COMPANY !!!,We travelled to Zanzibar in October 2015. From...,1.0


In [1851]:
df3['Review'] = df3['Heading'] + ' ' + df3['Body']
df3 = df3.drop(columns= ['Heading', 'Body'])
df3.head(5)


Unnamed: 0_level_0,Star Rating,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1
02-08-2024,0.0,Constantly canceling flights Kenya airlines ca...
04-08-2024,0.0,Kenya Airways canceled the flight and… Kenya A...
30-07-2024,0.0,If your thinking of Kenya Airlines...think aga...
29-07-2024,0.0,"Worst airlines ever Worst airlines ever, not f..."
25-07-2024,0.0,We were trying to go to Nampula from Nairobi a...


In [1852]:
df3.rename(columns={'Date3': 'Date', 'Star Rating': 'Rating'}, inplace=True)
df3.head(5)

Unnamed: 0_level_0,Rating,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1
02-08-2024,0.0,Constantly canceling flights Kenya airlines ca...
04-08-2024,0.0,Kenya Airways canceled the flight and… Kenya A...
30-07-2024,0.0,If your thinking of Kenya Airlines...think aga...
29-07-2024,0.0,"Worst airlines ever Worst airlines ever, not f..."
25-07-2024,0.0,We were trying to go to Nampula from Nairobi a...


## Merging the datasets

In [1853]:
Cleaned_df1.sample(10)

Unnamed: 0_level_0,Departures,Class,Review,Rating_Value,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-05-01,International,Economy,"Great service, good food and pleasant staff. G...",5.0,Excellent
2022-04-01,International,Economy,"The worst airline, my trip became the worst ni...",1.0,Poor
2019-11-01,International,Business Class,We were booked in economy and used the biding ...,4.0,Good
2020-01-01,Africa,Economy,Went nonstop from ZNZ to NBO and had a good fl...,4.0,Good
2018-02-01,International,Economy,Missed connecting flight due to a few minutes ...,1.0,Poor
2019-07-01,Africa,Economy,Unfortunately we got off to a bad start with K...,3.0,Average
2017-02-01,Africa,Economy,Kenya Airways has better entertainment than mo...,5.0,Excellent
2019-09-01,North Africa,Economy,They have a great time keeping and service exp...,5.0,Excellent
2017-03-01,Africa,Economy,Took a flight from Entebbe to Nairobi and from...,4.0,Good
2018-11-01,International,Economy,Totally loved the flight to Nairobi - the fli...,3.0,Average


In [1854]:
df2.sample(10)

Unnamed: 0_level_0,Review
Date3,Unnamed: 1_level_1
2011-07-25,NBO-FIH. Check-in at the Jomo Kenyatta airport...
2018-06-10,✅ Trip Verified | Johannesburg to Kilimanjaro...
2024-02-24,✅ Trip Verified | I never thought I would exp...
2014-04-01,First time traveller with KA and returning wit...
2012-01-28,From Nairobi to Addis Ababa via Djibouti. I mu...
2011-09-13,Flights delayed. No customer service and poor ...
2021-11-09,✅ Trip Verified | Wonderful service onboard th...
2018-10-27,✅ Trip Verified | Accra to Dar Es Salaam via ...
2014-05-26,MBA-NBO-CAN-NBO-MBA.. 1st leg was an Embraer E...
2021-06-20,✅ Trip Verified | It's our first time booking...


In [1855]:
df3.sample(10)

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


Unnamed: 0_level_0,Rating,Review
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1
09-10-2021,0.0,"Avoid, if possible! One and only experience wi..."
29-08-2022,1.0,Kenya airways is the worst Airline you… Kenya ...
01-01-2024,0.0,"A disaster, delay of 24h without explanation A..."
08-01-2019,0.0,Kenya Airways – Factor in Ransom Payments I wa...
06-07-2019,0.0,"Wonderful experience Wonderful experience, tra..."
14-04-2021,0.0,Mis sold Mis sold. Told we can rebook before 2...
26-07-2023,4.0,Refund We had booked a bussines ticket last No...
03-08-2023,0.0,Disappointing Experience - Inflexible Name Cha...
25-07-2024,0.0,We were trying to go to Nampula from Nairobi a...
24-03-2024,1.0,"Bad airline ,no respect for clients The flight..."


In [1856]:
# Concatenate cleaned_df1 and df2 by rows
concatenated_df = pd.concat([Cleaned_df1, df2], axis=0)

concatenated_df

Unnamed: 0_level_0,Departures,Class,Review,Rating_Value,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,Africa,Economy,Recently I travelled for business from Cape To...,4.0,Good
2022-11-01,International,Economy,I want to thanks to Jane for her excellent ser...,5.0,Excellent
2022-05-01,International,Economy,not a bad airline to fly with could do with a ...,4.0,Good
2022-11-01,Africa,Business Class,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",1.0,Poor
2022-11-01,Africa,Economy,"Absolutely appalling airline, never ever use i...",1.0,Poor
...,...,...,...,...,...
2011-07-18,,,JNB-NBO-JNB. This trip showed both the good an...,,
2011-07-17,,,Nairobi-London. Lounge in Nairobi was small an...,,
2011-07-11,,,Nairobi-London. As me and my wife are both whe...,,
2011-07-06,,,LUN-LLW in Economy. Adequate for a 1 hr trip. ...,,


In [1857]:
concatenated_df.rename(columns={'Rating_Value': 'Rating'}, inplace=True)
concatenated_df.head(5)

Unnamed: 0_level_0,Departures,Class,Review,Rating,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,Africa,Economy,Recently I travelled for business from Cape To...,4.0,Good
2022-11-01,International,Economy,I want to thanks to Jane for her excellent ser...,5.0,Excellent
2022-05-01,International,Economy,not a bad airline to fly with could do with a ...,4.0,Good
2022-11-01,Africa,Business Class,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",1.0,Poor
2022-11-01,Africa,Economy,"Absolutely appalling airline, never ever use i...",1.0,Poor


In [1858]:
# Concatenate concatenated_df and df3 by rows
Final_df = pd.concat([concatenated_df, df3], axis=0)

Final_df

Unnamed: 0_level_0,Departures,Class,Review,Rating,Rating_Description
Date3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01 00:00:00,Africa,Economy,Recently I travelled for business from Cape To...,4.0,Good
2022-11-01 00:00:00,International,Economy,I want to thanks to Jane for her excellent ser...,5.0,Excellent
2022-05-01 00:00:00,International,Economy,not a bad airline to fly with could do with a ...,4.0,Good
2022-11-01 00:00:00,Africa,Business Class,"Dear Kenya Airways ,\n\nI am on my way to Zanz...",1.0,Poor
2022-11-01 00:00:00,Africa,Economy,"Absolutely appalling airline, never ever use i...",1.0,Poor
...,...,...,...,...,...
26-01-2017,,,Better than I expected Much better service tha...,0.0,
30-08-2016,,,Request for immediate action. I have never had...,0.0,
02-07-2016,,,Never again Flew with Kenya airways last year ...,0.0,
21-11-2015,,,NEVER AGAIN - RUBISH COMPANY !!! We travelled ...,1.0,
