In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load data
Consumer_Price_Index_Annually = pd.read_csv('Consumer Price Index/Consumer_Price_Index_Annually.csv')

In [None]:
Consumer_Price_Index_Annually.head(10)

In [None]:
Consumer_Price_Index_Annually['DATAFLOW'].unique()

In [None]:
Consumer_Price_Index_Annually['MEASURE'].unique()

In [None]:
Consumer_Price_Index_Annually['TIME_PERIOD'].unique()

In [None]:
Consumer_Price_Index_Annually.info()

In [None]:
# Sort by TIME_PERIOD
Consumer_Price_Index_Annually = Consumer_Price_Index_Annually.sort_values(by="TIME_PERIOD", ascending=True)

In [None]:
Consumer_Price_Index_Annually.head(10)

In [None]:
# Extract relevant columns
extracted_data = Consumer_Price_Index_Annually[['MEASURE', 'TIME_PERIOD', 'OBS_VALUE', 'CPI_DIV']]

# Display the first few rows of the extracted dataset
print("Extracted Data:")
print(extracted_data.head())

In [None]:



# Basic analysis
# 1. Group data by 'MEASURE' and calculate the average 'OBS_VALUE' over time
average_cpi = extracted_data.groupby(['MEASURE', 'TIME_PERIOD'])['OBS_VALUE'].mean().reset_index()

# 2. Display the trends over time for each measure
print("\nAverage CPI Values by Measure and Time:")
print(average_cpi)

# Optional: Plot trends for CPI measures
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))
sns.lineplot(data=average_cpi, x='TIME_PERIOD', y='OBS_VALUE', hue='MEASURE', marker='o')
plt.title('CPI Trends by Measure Over Time')
plt.xlabel('Year')
plt.ylabel('CPI Value')
plt.legend(title='Measure')
plt.grid(True)
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Consumer_Price_Index_Monthly = pd.read_csv('Consumer Price Index/Consumer_Price_Index_Monthly.csv')

In [None]:
Consumer_Price_Index_Monthly.head(10)

In [None]:
# Sort by TIME_PERIOD
Consumer_Price_Index_Monthly = Consumer_Price_Index_Monthly.sort_values(by="TIME_PERIOD", ascending=True)

In [None]:
Consumer_Price_Index_Monthly.head()

In [None]:
Consumer_Price_Index_Monthly["MEASURE"].unique()

In [None]:


# Extract relevant columns
extracted_monthly_data = Consumer_Price_Index_Monthly[['MEASURE', 'TIME_PERIOD', 'OBS_VALUE', 'CPI_DIV']]

# Convert TIME_PERIOD to a datetime format for proper time-series handling
extracted_monthly_data['TIME_PERIOD'] = pd.to_datetime(extracted_monthly_data['TIME_PERIOD'])

# Display the first few rows of the extracted dataset
print("Extracted Monthly Data:")
print(extracted_monthly_data.head())

# Group by 'MEASURE' and 'TIME_PERIOD' for analysis
monthly_trends = extracted_monthly_data.groupby(['MEASURE', 'TIME_PERIOD'])['OBS_VALUE'].mean().reset_index()

# Plot monthly trends for each measure
plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_trends, x='TIME_PERIOD', y='OBS_VALUE', hue='MEASURE', marker='o')
plt.title('Monthly CPI Trends by Measure')
plt.xlabel('Time Period')
plt.ylabel('CPI Value')
plt.xticks(rotation=45)
plt.legend(title='Measure')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Consumer_Price_Index_Quarterly = pd.read_csv('Consumer Price Index/Consumer_Price_Index_Quarterly.csv')

In [None]:
Consumer_Price_Index_Quarterly.head(10)

In [None]:
# Sort by TIME_PERIOD
Consumer_Price_Index_Quarterly = Consumer_Price_Index_Quarterly.sort_values(by="TIME_PERIOD", ascending=True)

In [None]:
Consumer_Price_Index_Quarterly.head(10)

In [None]:
Consumer_Price_Index_Quarterly.info()

In [None]:


# Extract relevant columns
extracted_quarterly_data = Consumer_Price_Index_Quarterly[['MEASURE', 'TIME_PERIOD', 'OBS_VALUE', 'CPI_DIV']]

# Convert TIME_PERIOD to a datetime-like format for easier handling
extracted_quarterly_data['TIME_PERIOD'] = pd.PeriodIndex(extracted_quarterly_data['TIME_PERIOD'], freq='Q').to_timestamp()

# Display the first few rows of the extracted dataset
print("Extracted Quarterly Data:")
print(extracted_quarterly_data.head())

# Group by 'MEASURE' and 'TIME_PERIOD' for analysis
quarterly_trends = extracted_quarterly_data.groupby(['MEASURE', 'TIME_PERIOD'])['OBS_VALUE'].mean().reset_index()

# Plot quarterly trends for each measure
plt.figure(figsize=(12, 6))
sns.lineplot(data=quarterly_trends, x='TIME_PERIOD', y='OBS_VALUE', hue='MEASURE', marker='o')
plt.title('Quarterly CPI Trends by Measure')
plt.xlabel('Time Period')
plt.ylabel('CPI Value (%)')
plt.xticks(rotation=45)
plt.legend(title='Measure')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Currency_Strength = pd.read_csv('Currency Strength/AED-USD.csv')

In [None]:
Currency_Strength.head()

In [None]:
# Sort by Date
Currency_Strength = Currency_Strength.sort_values(by="Date", ascending=True)

In [None]:
Currency_Strength.head()

In [None]:

import seaborn as sns


# Convert Date to datetime format
Currency_Strength['Date'] = pd.to_datetime(Currency_Strength['Date'])

# Extract relevant columns
extracted_currency_data = Currency_Strength[['Date', 'Close', 'Return']]

# Display the first few rows of the extracted data
print("Extracted AED-USD Currency Data:")
print(extracted_currency_data.head())

# Plot the AED-USD Close prices over time
plt.figure(figsize=(12, 6))
plt.plot(extracted_currency_data['Date'], extracted_currency_data['Close'], label='Close Price', color='blue')
plt.title('AED-USD Exchange Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Exchange Rate (AED to USD)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Analyze daily returns
# Remove NaN returns
returns_data = extracted_currency_data.dropna(subset=['Return'])

# Histogram of returns
plt.figure(figsize=(10, 6))
sns.histplot(returns_data['Return'], bins=50, kde=True, color='green')
plt.title('Histogram of AED-USD Daily Returns')
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()

# Summary statistics for returns
returns_summary = returns_data['Return'].describe()
print("\nSummary Statistics for AED-USD Daily Returns:")
print(returns_summary)


In [None]:
# Extract year from the Date column
extracted_currency_data['Year'] = extracted_currency_data['Date'].dt.year

# Calculate annual statistics
annual_returns = extracted_currency_data.groupby('Year')['Return'].agg(['mean', 'median', 'sum', 'std']).reset_index()
annual_returns.rename(columns={'mean': 'Average_Return', 'median': 'Median_Return', 'sum': 'Cumulative_Return', 'std': 'Volatility'}, inplace=True)

# Display annual returns summary
print("Annual Returns Summary:")
print(annual_returns)

# Plot average and cumulative returns over the years
plt.figure(figsize=(14, 6))
sns.lineplot(data=annual_returns, x='Year', y='Average_Return', label='Average Return', marker='o', color='blue')
sns.lineplot(data=annual_returns, x='Year', y='Cumulative_Return', label='Cumulative Return', marker='o', color='green')
plt.axhline(0, linestyle='--', color='gray', linewidth=1)
plt.title('AED-USD Annual Average and Cumulative Returns')
plt.xlabel('Year')
plt.ylabel('Return (%)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot volatility (standard deviation of daily returns)
plt.figure(figsize=(14, 6))
sns.barplot(data=annual_returns, x='Year', y='Volatility', color='orange')
plt.title('Annual Volatility of AED-USD Daily Returns')
plt.xlabel('Year')
plt.ylabel('Volatility (Standard Deviation)')
plt.xticks(rotation=45)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
GDP_Quarterly_Constant_Prices = pd.read_csv('Gross Domestic Product/GDP_Quarterly_Constant_Prices.csv')

In [None]:
GDP_Quarterly_Constant_Prices.head(20)

In [None]:
GDP_Quarterly_Constant_Prices.info()

In [None]:
GDP_Quarterly_Constant_Prices['DATAFLOW'].unique()


In [None]:
GDP_Quarterly_Constant_Prices['REF_AREA'].unique()

In [None]:
# Map quarters to the first month of the respective quarter
quarter_map = {'Q1': '01', 'Q2': '04', 'Q3': '07', 'Q4': '10'}

# Replace the QUARTER with corresponding start month
GDP_Quarterly_Constant_Prices['Month'] = GDP_Quarterly_Constant_Prices['QUARTER'].map(quarter_map)
GDP_Quarterly_Constant_Prices['Date'] = pd.to_datetime(
    GDP_Quarterly_Constant_Prices['TIME_PERIOD'].astype(str) + '-' + GDP_Quarterly_Constant_Prices['Month'] + '-01'
)

# Now `Date` is a valid datetime object.


In [None]:
# Ensure OBS_VALUE is numeric
GDP_Quarterly_Constant_Prices['OBS_VALUE'] = pd.to_numeric(GDP_Quarterly_Constant_Prices['OBS_VALUE'], errors='coerce')

# Calculate annual GDP by summing over quarters
annual_gdp = GDP_Quarterly_Constant_Prices.groupby('TIME_PERIOD')['OBS_VALUE'].sum().reset_index()
annual_gdp.rename(columns={'OBS_VALUE': 'Annual_GDP'}, inplace=True)

# Calculate quarterly and annual percentage change
GDP_Quarterly_Constant_Prices['Quarterly_Change'] = GDP_Quarterly_Constant_Prices['OBS_VALUE'].pct_change() * 100
annual_gdp['Annual_Change'] = annual_gdp['Annual_GDP'].pct_change() * 100

# Visualization: Quarterly GDP Trends
plt.figure(figsize=(14, 6))
sns.lineplot(data=GDP_Quarterly_Constant_Prices, x='Date', y='OBS_VALUE', marker='o', label='Quarterly GDP', color='blue')
plt.title('Quarterly GDP Trends (Constant Prices)')
plt.xlabel('Date')
plt.ylabel('GDP (Million AED)')
plt.grid(True)
plt.tight_layout()
plt.show()

# Visualization: Annual GDP Growth
plt.figure(figsize=(14, 6))
sns.barplot(data=annual_gdp, x='TIME_PERIOD', y='Annual_Change', palette='viridis')
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.title('Annual GDP Growth Rate (%)')
plt.xlabel('Year')
plt.ylabel('Growth Rate (%)')
plt.xticks(rotation=45)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
# Check 2024 data
print(GDP_Quarterly_Constant_Prices[GDP_Quarterly_Constant_Prices['TIME_PERIOD'] == 2024])


In [None]:
# Check for missing quarters by year
missing_quarters = GDP_Quarterly_Constant_Prices.groupby('TIME_PERIOD')['QUARTER'].nunique()
print(missing_quarters)


In [None]:
# Verify the annual GDP sum
print(GDP_Quarterly_Constant_Prices[GDP_Quarterly_Constant_Prices['TIME_PERIOD'] == 2024][['QUARTER', 'OBS_VALUE']])
print(annual_gdp[annual_gdp['TIME_PERIOD'] == 2024])


In [None]:
# Inspect the growth rate calculation
print(annual_gdp[['TIME_PERIOD', 'Annual_GDP', 'Annual_Change']])


In [None]:
# Plot quarterly GDP for 2023 and 2024
filtered_data = GDP_Quarterly_Constant_Prices[GDP_Quarterly_Constant_Prices['TIME_PERIOD'].isin([2023, 2024])]
plt.figure(figsize=(14, 6))
sns.lineplot(data=filtered_data, x='Date', y='OBS_VALUE', marker='o', hue='TIME_PERIOD', palette='Set2')
plt.title('Quarterly GDP Trends for 2023 and 2024')
plt.xlabel('Date')
plt.ylabel('GDP (Million AED)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
GDP_Quarterly_Current_Prices = pd.read_csv('Gross Domestic Product/GDP_Quarterly_Current_Prices.csv')

In [None]:
GDP_Quarterly_Current_Prices.head(10)

In [None]:
# Map quarter to the first month of the quarter
quarter_months = {
    'Q1': '01', 'Q2': '04', 'Q3': '07', 'Q4': '10'
}

# Create a new column for Date by combining TIME_PERIOD and QUARTER
GDP_Quarterly_Current_Prices['Date'] = GDP_Quarterly_Current_Prices['TIME_PERIOD'].astype(str) + '-' + GDP_Quarterly_Current_Prices['QUARTER'].map(quarter_months)

# Convert the Date column to datetime format (set it to the 1st of the month)
GDP_Quarterly_Current_Prices['Date'] = pd.to_datetime(GDP_Quarterly_Current_Prices['Date'], format='%Y-%m')

# Ensure OBS_VALUE is numeric
GDP_Quarterly_Current_Prices['OBS_VALUE'] = pd.to_numeric(GDP_Quarterly_Current_Prices['OBS_VALUE'], errors='coerce')

# Visualize Growth Rate Trends over Time
plt.figure(figsize=(14, 6))
sns.lineplot(data=GDP_Quarterly_Current_Prices, x='Date', y='OBS_VALUE', marker='o', label='Quarterly GDP Growth Rate', color='green')
plt.title('Quarterly GDP Growth Rate (Current Prices)')
plt.xlabel('Date')
plt.ylabel('Growth Rate (%)')
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate annual growth rate by summing the growth rate over each year
GDP_Quarterly_Current_Prices['Annual_Growth'] = GDP_Quarterly_Current_Prices.groupby('TIME_PERIOD')['OBS_VALUE'].transform('sum')

# Visualize Annual Growth Rate Trends
plt.figure(figsize=(14, 6))
sns.barplot(data=GDP_Quarterly_Current_Prices, x='TIME_PERIOD', y='Annual_Growth', palette='Blues')
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.title('Annual GDP Growth Rate (Current Prices)')
plt.xlabel('Year')
plt.ylabel('Growth Rate (%)')
plt.xticks(rotation=45)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Population_Gender = pd.read_csv('Population/Population_Estimates_and_Growth_by_Gender.csv')

In [None]:
Population_Gender.head(10)

In [None]:
# Male, Female, and Total Population Trend Over Time
# Grouping by TIME_PERIOD and GENDER, summing the OBS_VALUE for male and female populations
male_population = Population_Gender[Population_Gender['GENDER'] == 'M']
female_population = Population_Gender[Population_Gender['GENDER'] == 'F']

# Plotting Male, Female, and Total Population Trends
plt.figure(figsize=(14, 6))
plt.plot(male_population['TIME_PERIOD'], male_population['OBS_VALUE'], label='Male Population', color='blue')
plt.plot(female_population['TIME_PERIOD'], female_population['OBS_VALUE'], label='Female Population', color='pink')
plt.plot(male_population['TIME_PERIOD'], male_population['OBS_VALUE'] + female_population['OBS_VALUE'], label='Total Population', color='green', linestyle='--')

plt.title('Male, Female, and Total Population Trend Over Time')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Gender Distribution Across Years
# Calculating the percentage of Male and Female populations for each year
total_population = Population_Gender.groupby('TIME_PERIOD')['OBS_VALUE'].sum()
male_population_sum = male_population.groupby('TIME_PERIOD')['OBS_VALUE'].sum()
female_population_sum = female_population.groupby('TIME_PERIOD')['OBS_VALUE'].sum()

gender_distribution = pd.DataFrame({
    'Male Percentage': (male_population_sum / total_population) * 100,
    'Female Percentage': (female_population_sum / total_population) * 100
})

# Plotting Gender Distribution
plt.figure(figsize=(14, 6))
sns.lineplot(data=gender_distribution, x=gender_distribution.index, y='Male Percentage', label='Male Percentage', color='blue')
sns.lineplot(data=gender_distribution, x=gender_distribution.index, y='Female Percentage', label='Female Percentage', color='pink')

plt.title('Gender Distribution Across Years')
plt.xlabel('Year')
plt.ylabel('Percentage')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Annual Growth Rate of Population
# Calculate annual growth rate of total population
population_growth = total_population.pct_change() * 100

# Plotting the Annual Growth Rate
plt.figure(figsize=(14, 6))
sns.barplot(x=population_growth.index, y=population_growth.values, palette='viridis')
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.title('Annual Growth Rate of Population')
plt.xlabel('Year')
plt.ylabel('Growth Rate (%)')
plt.xticks(rotation=45)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Population_Indicators = pd.read_csv('Population/Population_Indicators.csv')

In [None]:
Population_Indicators.head(20)

In [None]:
Population_Indicators.info()

In [None]:
Population_Indicators.isnull().sum()  # Check for missing data


In [None]:
print(Population_Indicators.columns)


In [None]:
# Pivot the data to make each population indicator a separate column
Population_Indicators = Population_Indicators.pivot(index='TIME_PERIOD', columns='POP_IND', values='OBS_VALUE').reset_index()


In [None]:
print(Population_Indicators.head())


In [None]:
print(Population_Indicators['TIME_PERIOD'].dtype)
print(Population_Indicators['TIME_PERIOD'].unique())


In [None]:
Population_Indicators['TIME_PERIOD'] = Population_Indicators['TIME_PERIOD'].astype(int)


In [None]:
Population_Indicators['TIME_PERIOD'] = pd.to_numeric(Population_Indicators['TIME_PERIOD'], errors='coerce')


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(14, 6))

# Plot the lines
sns.lineplot(data=Population_Indicators, x='TIME_PERIOD', y='MED', label='Median Age')
sns.lineplot(data=Population_Indicators, x='TIME_PERIOD', y='CDR', label='Crude Death Rate')
sns.lineplot(data=Population_Indicators, x='TIME_PERIOD', y='LEB', label='Life Expectancy (Total)')

# Explicitly set x-axis ticks to the unique years
plt.xticks(ticks=[2019, 2020], labels=['2019', '2020'], rotation=45)

# Customize the plot
plt.title('Population Indicators Over Time (2019-2020)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Guests_by_Hotel_Type_by_Region = pd.read_csv('Tourism/Guests_by_Hotel_Type_by_Region.csv')

In [None]:
Guests_by_Hotel_Type_by_Region.head(10)

In [None]:
Guests_by_Hotel_Type_by_Region.info()

In [None]:
Guests_by_Hotel_Type_by_Region['TIME_PERIOD'].unique()

In [None]:
# Guest Trends Over Time
plt.figure(figsize=(14, 6))
sns.lineplot(data=Guests_by_Hotel_Type_by_Region, x='TIME_PERIOD', y='OBS_VALUE', marker='o', label='Number of Guests', color='blue')
plt.title('Guest Trends Over Time (1979-2022)')
plt.xlabel('Year')
plt.ylabel('Number of Guests')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Region-based Guest Trends
plt.figure(figsize=(14, 6))
sns.lineplot(data=Guests_by_Hotel_Type_by_Region, x='TIME_PERIOD', y='OBS_VALUE', hue='GUEST_REGION', marker='o')
plt.title('Guest Trends by Region Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Guests')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
Guests_by_Hotel_Type_by_Region['GUEST_REGION'].unique()

In [None]:
'''
UAE – United Arab Emirates (domestic guests)
GCC – Gulf Cooperation Council (Saudi Arabia, Kuwait, Bahrain, Qatar, Oman, UAE)
OC – Oceania (Australia, New Zealand, Pacific Islands)
AF – Africa
ASC – Asia Subcontinent (likely South Asia: India, Pakistan, Bangladesh, Sri Lanka, Nepal)
AC – Possibly "Asia & Caucasus" or "American Continent"
ACAF – Asia, Caucasus, and Africa (a combined classification)
EC – Possibly "Europe & CIS" (Commonwealth of Independent States, including Russia, Ukraine, etc.)
AM – Americas (North & South America)
OTH – Others (regions that don’t fit into the predefined categories)
'''

In [None]:
# Yearly Change in Guests
Guests_by_Hotel_Type_by_Region['Yearly_Change'] = Guests_by_Hotel_Type_by_Region['OBS_VALUE'].pct_change() * 100

plt.figure(figsize=(14, 6))
sns.barplot(data=Guests_by_Hotel_Type_by_Region, x='TIME_PERIOD', y='Yearly_Change', palette='viridis')
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.title('Yearly Change in Number of Guests (%)')
plt.xlabel('Year')
plt.ylabel('Change in Guests (%)')
plt.xticks(rotation=90)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Hotel_Establishments_and_Rooms_by_Rating_Type = pd.read_csv('Tourism/Hotel_Establishments_and_Rooms_by_Rating_Type.csv')

In [None]:
Hotel_Establishments_and_Rooms_by_Rating_Type.head(20)

In [None]:
Hotel_Establishments_and_Rooms_by_Rating_Type['H_INDICATOR'].unique()

In [None]:
Hotel_Establishments_and_Rooms_by_Rating_Type['GUEST_REGION'].unique()

In [None]:
Hotel_Establishments_and_Rooms_by_Rating_Type.info()

In [None]:
Hotel_Establishments_and_Rooms_by_Rating_Type['TIME_PERIOD'].unique()

In [None]:
# Hotel Establishments Over Time
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_and_Rooms_by_Rating_Type[Hotel_Establishments_and_Rooms_by_Rating_Type['H_INDICATOR'] == 'EST'],
             x='TIME_PERIOD', y='OBS_VALUE', marker='o', color='blue')
plt.title('Hotel Establishments Over Time (1975-2022)')
plt.xlabel('Year')
plt.ylabel('Number of Hotel Establishments')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Rooms Over Time by Rating Type
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_and_Rooms_by_Rating_Type[Hotel_Establishments_and_Rooms_by_Rating_Type['H_INDICATOR'] == 'ROOM'],
             x='TIME_PERIOD', y='OBS_VALUE', marker='o', color='green')
plt.title('Rooms Over Time (1975-2022)')
plt.xlabel('Year')
plt.ylabel('Number of Rooms')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Calculate annual change for Hotel Establishments and Rooms
Hotel_Establishments_and_Rooms_by_Rating_Type['Annual_Change'] = Hotel_Establishments_and_Rooms_by_Rating_Type['OBS_VALUE'].pct_change() * 100

plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_and_Rooms_by_Rating_Type[Hotel_Establishments_and_Rooms_by_Rating_Type['H_INDICATOR'] == 'EST'],
             x='TIME_PERIOD', y='Annual_Change', marker='o', label='Hotel Establishments')
sns.lineplot(data=Hotel_Establishments_and_Rooms_by_Rating_Type[Hotel_Establishments_and_Rooms_by_Rating_Type['H_INDICATOR'] == 'ROOM'],
             x='TIME_PERIOD', y='Annual_Change', marker='o', label='Rooms')
plt.title('Annual Change in Hotel Establishments and Rooms')
plt.xlabel('Year')
plt.ylabel('Annual Change (%)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Hotel_Establishments_Main_Indicators = pd.read_csv('Tourism/Hotel_Establishments_Main_Indicators.csv')

In [None]:
Hotel_Establishments_Main_Indicators.head(10)

In [None]:
Hotel_Establishments_Main_Indicators['H_INDICATOR'].unique()

In [None]:
'''
RR - Total Revenue (likely the most important indicator)
TOR - Total Occupancy Rate (percentage of rooms occupied)
GUN - Gross Utilization Number (likely measures the occupancy or usage rate of hotel resources)
LS - Length of Stay (average duration of stay for guests)
FB - Food & Beverage Revenue (likely indicates revenue from dining services)
AR - Average Room Rate (the average price of a room per night)
OR - Occupancy Rate (similar to TOR, but could be more specific in context)
TR - Total Rooms (total number of rooms in the establishment)
TAR - Total Available Rooms (could indicate the number of rooms available for booking)
ARR - Average Room Revenue (average revenue per room)
'''

In [None]:
Hotel_Establishments_Main_Indicators['TIME_PERIOD'].unique()

In [None]:
# Revenue per Room (RR) Over Time
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'] == 'RR'],
             x='TIME_PERIOD', y='OBS_VALUE', marker='o', color='purple')
plt.title('Revenue Per Room (RR) Over Time (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Revenue Per Room (AED)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Calculate annual change in Revenue Per Room
Hotel_Establishments_Main_Indicators['Annual_Change_RR'] = Hotel_Establishments_Main_Indicators['OBS_VALUE'].pct_change() * 100

plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'] == 'RR'],
             x='TIME_PERIOD', y='Annual_Change_RR', marker='o', color='orange')
plt.title('Annual Change in Revenue Per Room (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Annual Change (%)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Plot Total Revenue vs Occupancy Rate (TOR or OR)
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['RR', 'TOR', 'OR'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Total Revenue and Occupancy Rate Over Time (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Compare AR and ARR
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['AR', 'ARR'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Average Room Rate vs Average Room Revenue (2015-2022)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Plot Length of Stay vs Occupancy Rate
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['LS', 'TOR', 'OR'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Length of Stay vs Occupancy Rate (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Compare ARR and TOR (Occupancy Rate)
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['ARR', 'TOR'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Revenue Per Room vs Occupancy Rate (2015-2022)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Compare Total Revenue and Food & Beverage Revenue
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['RR', 'FB'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Total Revenue vs Food & Beverage Revenue (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Revenue (AED)')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Compare Length of Stay and Total Revenue
plt.figure(figsize=(14, 6))
sns.lineplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['LS', 'RR'])],
             x='TIME_PERIOD', y='OBS_VALUE', hue='H_INDICATOR', markers=True)
plt.title('Length of Stay vs Total Revenue (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend(title='Indicator')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Year-on-Year Comparison of Revenue Per Room
Hotel_Establishments_Main_Indicators['Previous_RR'] = Hotel_Establishments_Main_Indicators['OBS_VALUE'].shift(1)
Hotel_Establishments_Main_Indicators['YoY_RR_Comparison'] = Hotel_Establishments_Main_Indicators['OBS_VALUE'] - Hotel_Establishments_Main_Indicators['Previous_RR']

plt.figure(figsize=(14, 6))
sns.barplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'] == 'RR'],
            x='TIME_PERIOD', y='YoY_RR_Comparison', color='green')
plt.title('Year-on-Year Revenue Per Room Comparison (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Difference in Revenue Per Room (AED)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Distribution of Revenue Per Room
plt.figure(figsize=(14, 6))
sns.boxplot(data=Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'] == 'RR'],
            x='TIME_PERIOD', y='OBS_VALUE', palette='Set2')
plt.title('Distribution of Revenue Per Room (1985-2022)')
plt.xlabel('Year')
plt.ylabel('Revenue Per Room (AED)')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Create a correlation heatmap
selected_indicators = Hotel_Establishments_Main_Indicators[Hotel_Establishments_Main_Indicators['H_INDICATOR'].isin(['RR', 'AR', 'FB', 'TOR', 'LS'])]
pivot_table = selected_indicators.pivot_table(values='OBS_VALUE', index='TIME_PERIOD', columns='H_INDICATOR')
corr_matrix = pivot_table.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Heatmap Between Hotel Indicators')
plt.tight_layout()
plt.show()


In [None]:
#########################################################################

In [None]:
# Load data
Metadata_World_Development_Indicator = pd.read_csv('World Development Indicators/Metadata_World_Development_Indicator.csv')

In [None]:
Metadata_World_Development_Indicator.head(10)

In [None]:
Metadata_World_Development_Indicator.info()

In [None]:
Metadata_World_Development_Indicator['INDICATOR_NAME'].unique()

In [None]:
Metadata_World_Development_Indicator['SOURCE_NOTE'].unique()

In [None]:
Metadata_World_Development_Indicator['SOURCE_ORGANIZATION'].unique()

In [None]:
# Checking the column names to verify 'TIME_PERIOD'
print(Metadata_World_Development_Indicator.columns)


In [None]:
#########################################################################

In [None]:
# Load data
World_Development_Indicator = pd.read_csv('World Development Indicators/World_Development_Indicator.csv',
                                          skiprows=4,            # Skip the first 4 rows
                                          delimiter=",",         # Specify the delimiter
                                          quotechar='"',         # Handle quoted fields
                                          engine="python"        # Use Python engine for flexibility
                                         )

In [None]:
World_Development_Indicator.head()

In [None]:
World_Development_Indicator.info()

In [None]:
World_Development_Indicator["Country Name"].unique()

In [None]:
World_Development_Indicator["Country Code"].unique()

In [None]:
World_Development_Indicator["Indicator Name"].unique()

In [None]:
World_Development_Indicator["Indicator Code"].unique()

In [None]:
import matplotlib.pyplot as plt

# Filter the data for the specific country (e.g., United Arab Emirates)
uae_data = World_Development_Indicator[World_Development_Indicator['Country Name'] == 'United Arab Emirates']

# Select the columns from 1960 to 2023 for plotting
uae_data_years = uae_data.loc[:, '1960':'2023']

# Iterate through indicators and plot trends
for indicator_name, indicator_values in zip(uae_data['Indicator Name'], uae_data_years.values):
    # Drop missing years and their values
    valid_years = uae_data_years.columns[~pd.isna(indicator_values)]
    valid_values = indicator_values[~pd.isna(indicator_values)]
    
    # Skip if no valid data
    if len(valid_values) == 0:
        continue

    # Plot the data
    plt.figure(figsize=(14, 6))
    plt.plot(valid_years, valid_values, marker='o', label=indicator_name)
    plt.title(f'Trend of {indicator_name} for United Arab Emirates (1960-2023)', fontsize=14)
    plt.xlabel('Year', fontsize=12)
    plt.ylabel('Value', fontsize=12)
    plt.xticks(rotation=90, fontsize=10)
    plt.yticks(fontsize=10)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.legend(loc='upper left', fontsize=10)
    plt.show()


In [None]:
# Get unique Indicator Names
indicator_names = World_Development_Indicator['Indicator Name'].unique()

# Display all unique indicator names
for i, name in enumerate(indicator_names, 1):
    print(f"{i}. {name}")


In [None]:
# Define keywords for filtering indicators
keywords = ['employment', 'GDP', 'education', 'migration', 'health', 'unemployment']

# Suggest indicators matching the keywords
suggested_indicators = World_Development_Indicator[
    World_Development_Indicator['Indicator Name']
    .str.contains('|'.join(keywords), case=False, na=False)
]['Indicator Name'].unique()

# Display the suggested indicators
for i, name in enumerate(suggested_indicators, 1):
    print(f"{i}. {name}")


In [None]:
themes = {
    "Economy": ['GDP', 'income', 'inflation', 'trade'],
    "Employment": ['employment', 'labor', 'unemployment', 'wage'],
    "Education": ['literacy', 'school', 'enrollment', 'education'],
    "Health": ['life expectancy', 'mortality', 'birth', 'death'],
    "Migration": ['population', 'migrant', 'migration', 'urban']
}

for theme, keywords in themes.items():
    print(f"\n--- {theme} Indicators ---")
    suggested_indicators = World_Development_Indicator[
        World_Development_Indicator['Indicator Name']
        .str.contains('|'.join(keywords), case=False, na=False)
    ]['Indicator Name'].unique()
    
    for i, name in enumerate(suggested_indicators, 1):
        print(f"{i}. {name}")


In [None]:
# Count non-missing values for each indicator
uae_data = World_Development_Indicator[
    World_Development_Indicator['Country Name'] == 'United Arab Emirates'
]
uae_data['NonMissingCount'] = uae_data.loc[:, '1960':'2023'].notna().sum(axis=1)

# Sort indicators by data availability
top_indicators = uae_data.sort_values('NonMissingCount', ascending=False)

# Display the top 20 indicators
print(top_indicators[['Indicator Name', 'NonMissingCount']].head(20))
