In [None]:
# pip install adjustText

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load Rents data
Rents = pd.read_csv('Rents & Transactions/rents.csv', delimiter=';', low_memory=False)

In [None]:
Rents.info()

In [None]:
Rents['Area'].unique()

In [None]:
# Function to normalize area names
def normalize_area(Area):
    if pd.isna(Area):
        return None
    return " ".join(Area.upper().strip().split())

# Apply normalization
Rents["Area"] = Rents["Area"].apply(normalize_area)

In [None]:
Rents['Area'].unique()

In [None]:
# Load the Kaggle dataset containing area coordinates
coordinates_df = pd.read_excel('Final - Dubai Areas.xlsx')


In [None]:
coordinates_df.info()

In [None]:
coordinates_df.rename(columns={'area': 'Area'}, inplace=True)


In [None]:
coordinates_df['Area'].unique()

In [None]:
# Merge your dataset with the coordinates dataset on the 'Area' column
merged_df = pd.merge(Rents, coordinates_df, on='Area', how='left')


In [None]:
merged_df['Area'].unique()

In [None]:
merged_df.info()

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Convert coordinates_df to a GeoDataFrame
geometry = gpd.points_from_xy(merged_df['lon'], merged_df['lat'])
gdf = gpd.GeoDataFrame(merged_df, geometry=geometry)

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
gdf.plot(ax=ax, markersize=10, color='red', alpha=0.6)
plt.title("Mapped Areas")
plt.show()


In [None]:

# Load a lightweight world map from Natural Earth (coastlines only)
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")

# Convert merged_df to a GeoDataFrame
gdf = gpd.GeoDataFrame(
    merged_df, 
    geometry=gpd.points_from_xy(merged_df['lon'], merged_df['lat']),
    crs="EPSG:4326"
)

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
world.plot(ax=ax, color="lightgrey", edgecolor="black")  # Background map
gdf.plot(ax=ax, markersize=5, color="red", alpha=0.6)  # Points

ax.set_title("Mapped Areas (Fast GeoPandas Map)")
plt.show()


In [None]:

# Load world map and filter only UAE
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Convert merged_df to GeoDataFrame
gdf = gpd.GeoDataFrame(
    merged_df, 
    geometry=gpd.points_from_xy(merged_df['lon'], merged_df['lat']),
    crs="EPSG:4326"
)

# Plot only UAE
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE Map
gdf.plot(ax=ax, markersize=10, color="red", alpha=0.7)  # Points

ax.set_title("Mapped Areas in UAE (GeoPandas Map)")
plt.show()


In [None]:

# Load world map and filter only UAE
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box (approximate coordinates)
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Convert merged_df to GeoDataFrame
gdf = gpd.GeoDataFrame(
    merged_df, 
    geometry=gpd.points_from_xy(merged_df['lon'], merged_df['lat']),
    crs="EPSG:4326"
)

# Plot only UAE and zoom into Dubai
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE Map
gdf.plot(ax=ax, markersize=20, color="red", alpha=0.7)  # Points

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Mapped Areas in Dubai (GeoPandas Map)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Find the top 10 most frequent areas
top_areas = Rents['Area'].value_counts().head(10).index

# Filter data for the top areas
filtered_Rents = Rents[Rents['Area'].isin(top_areas)]

# Compute average Annual Amount by area
area_avg = filtered_Rents.groupby('Area')['Annual Amount'].mean().sort_values(ascending=False)

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_avg.index, 'Average Annual Rent': area_avg.values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Label the top areas
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    ax.text(x, y, label, fontsize=8, ha="right", color="black")

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 10 Areas (Dubai)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
# Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Find the top 20 most frequent areas
top_areas = Rents['Area'].value_counts().head(20).index

# Filter data for the top areas
filtered_Rents = Rents[Rents['Area'].isin(top_areas)]

# Compute average Annual Amount by area
area_avg = filtered_Rents.groupby('Area')['Annual Amount'].mean().sort_values(ascending=False)

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_avg.index, 'Average Annual Rent': area_avg.values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Label the top areas
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    ax.text(x, y, label, fontsize=8, ha="right", color="black")

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 20 Areas (Dubai)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
# Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Find the top 20 most frequent areas
top_areas = Rents['Area'].value_counts().head(15).index

# Filter data for the top areas
filtered_Rents = Rents[Rents['Area'].isin(top_areas)]

# Compute average Annual Amount by area
area_avg = filtered_Rents.groupby('Area')['Annual Amount'].mean().sort_values(ascending=False)

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_avg.index, 'Average Annual Rent': area_avg.values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Drop rows with invalid or missing coordinates
gdf = gdf.dropna(subset=['lon', 'lat'])

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Label the top areas (skip invalid coordinates)
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    if pd.notna(x) and pd.notna(y):  # Ensure coordinates are valid
        ax.text(x, y, label, fontsize=6, ha="left", color="black")

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 15 Areas (Dubai)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
# Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Find the top 20 most frequent areas
top_areas = Rents['Area'].value_counts().head(20).index

# Filter data for the top areas
filtered_Rents = Rents[Rents['Area'].isin(top_areas)]

# Compute average Annual Amount by area
area_avg = filtered_Rents.groupby('Area')['Annual Amount'].mean().sort_values(ascending=False)

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_avg.index, 'Average Annual Rent': area_avg.values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Drop rows with invalid or missing coordinates
gdf = gdf.dropna(subset=['lon', 'lat'])

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Prepare labels
texts = []
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    if pd.notna(x) and pd.notna(y):  # Ensure coordinates are valid
        # Slightly offset the label position to avoid overlap
        text = ax.text(x + 0.01, y + 0.01, label, fontsize=6, ha="left", color="black", weight='bold', alpha=0.7)
        texts.append((x, y, text))

# Adjust the label positions manually if they are too close to each other
for i, (x1, y1, text1) in enumerate(texts):
    for j, (x2, y2, text2) in enumerate(texts):
        if i != j:  # Don't compare a label with itself
            # Calculate the distance between labels
            distance = ((x1 - x2)**2 + (y1 - y2)**2)**0.5
            if distance < 0.02:  # Threshold for overlap
                # Move the label to the right if it's too close
                if x1 < x2:
                    text1.set_position((x1 + 0.01, y1))
                else:
                    text1.set_position((x1 - 0.01, y1))
                # Move the label up if it's too close vertically
                if y1 < y2:
                    text1.set_position((x1, y1 + 0.01))
                else:
                    text1.set_position((x1, y1 - 0.01))

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 20 Areas (Dubai)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
# Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Find the top 20 most frequent areas
top_areas = Rents['Area'].value_counts().head(20).index

# Filter data for the top areas
filtered_Rents = Rents[Rents['Area'].isin(top_areas)]

# Compute average Annual Amount by area
area_avg = filtered_Rents.groupby('Area')['Annual Amount'].mean().sort_values(ascending=False)

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_avg.index, 'Average Annual Rent': area_avg.values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Drop rows with invalid or missing coordinates
gdf = gdf.dropna(subset=['lon', 'lat'])

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Prepare labels and adjust positions
texts = []
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    if pd.notna(x) and pd.notna(y):  # Ensure coordinates are valid
        # Place labels and store the text objects for later adjustments
        text = ax.text(x + 0.01, y + 0.01, label, fontsize=6, ha="left", color="black", weight='bold', alpha=0.7)
        texts.append((x, y, text))

# Check for overlap and manually adjust
for i, (x1, y1, text1) in enumerate(texts):
    for j, (x2, y2, text2) in enumerate(texts):
        if i != j:  # Don't compare a label with itself
            # Calculate the distance between labels
            distance = ((x1 - x2)**2 + (y1 - y2)**2)**0.5
            if distance < 0.02:  # Threshold for overlap
                # Move the label to the right if it's too close
                if x1 < x2:
                    text1.set_position((x1 + 0.015, y1))
                else:
                    text1.set_position((x1 - 0.015, y1))
                # Move the label up if it's too close vertically
                if y1 < y2:
                    text1.set_position((x1, y1 + 0.015))
                else:
                    text1.set_position((x1, y1 - 0.015))

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 20 Areas (Dubai)")
plt.show()


In [None]:

# Remove commas and convert 'Annual Amount' to numeric
# Rents['Annual Amount'] = Rents['Annual Amount'].str.replace(',', '').astype(float)

# Get the top 10 most frequent Areas
top_areas = Rents['Area'].value_counts().nlargest(10).index

# Filter the data to include only the top 10 areas
filtered_rents = Rents[Rents['Area'].isin(top_areas)]

# Grouping by Area and Version to see the impact of Area and Version on Annual Amount
area_version_avg = filtered_rents.groupby(['Area', 'Version'])['Annual Amount'].mean().unstack().fillna(0)

# Plot the Average Annual Amount by Area and Version (Top 10 Areas)
area_version_avg.plot(kind='bar', figsize=(12, 6), stacked=False, colormap='Set1')
plt.title("Average Annual Amount by Area and Version (Top 10 Areas)")
plt.ylabel("Average Annual Amount")
plt.xticks(rotation=45)
plt.xlabel("Area")
plt.legend(title="Version", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

# Box Plot for Annual Amount Distribution by Area and Version (Top 10 Areas)
plt.figure(figsize=(14, 7))
sns.boxplot(x='Area', y='Annual Amount', hue='Version', data=filtered_rents, palette='muted')
plt.title("Annual Amount Distribution by Area and Version (Top 10 Areas)")
plt.xticks(rotation=45)
plt.show()

# Calculate Percentage Change in Average Annual Amount between Versions for Top 10 Areas
area_version_change = area_version_avg.pct_change(axis='columns') * 100

# Plot Percentage Change in Annual Amount for Top 10 Areas and Versions
area_version_change.plot(kind='bar', figsize=(12, 6), colormap='coolwarm')
plt.title("Percentage Change in Average Annual Amount by Area and Version (Top 10 Areas)")
plt.ylabel("Percentage Change (%)")
plt.xlabel("Area")
plt.xticks(rotation=45)
plt.legend(title="Version", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

# Merge with coordinates (assuming coordinates_df has 'Area', 'lat', 'lon')
top_areas_df = pd.DataFrame({'Area': area_version_avg.index, 'Average Annual Rent': area_version_avg.mean(axis=1).values})
top_areas_geo = top_areas_df.merge(coordinates_df, on="Area", how="left")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    top_areas_geo, 
    geometry=gpd.points_from_xy(top_areas_geo['lon'], top_areas_geo['lat']),
    crs="EPSG:4326"
)

# Drop rows with invalid or missing coordinates
gdf = gdf.dropna(subset=['lon', 'lat'])

# Load UAE map and filter only Dubai
world = gpd.read_file("https://github.com/nvkelso/natural-earth-vector/raw/master/geojson/ne_110m_admin_0_countries.geojson")
uae = world[world["NAME"] == "United Arab Emirates"]

# Define Dubai bounding box
dubai_bounds = {
    "minx": 55.0,
    "maxx": 56.0,
    "miny": 24.8,
    "maxy": 25.5
}

# Plot the map
fig, ax = plt.subplots(figsize=(10, 10))
uae.plot(ax=ax, color="lightgrey", edgecolor="black")  # UAE map
gdf.plot(ax=ax, markersize=gdf['Average Annual Rent'] / 5000, color="red", alpha=0.7)  # Scaled markers

# Prepare labels and adjust positions
texts = []
for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["Area"]):
    if pd.notna(x) and pd.notna(y):  # Ensure coordinates are valid
        # Place labels and store the text objects for later adjustments
        text = ax.text(x + 0.01, y + 0.01, label, fontsize=6, ha="left", color="black", weight='bold', alpha=0.7)
        texts.append((x, y, text))

# Check for overlap and manually adjust
for i, (x1, y1, text1) in enumerate(texts):
    for j, (x2, y2, text2) in enumerate(texts):
        if i != j:  # Don't compare a label with itself
            # Calculate the distance between labels
            distance = ((x1 - x2)**2 + (y1 - y2)**2)**0.5
            if distance < 0.02:  # Threshold for overlap
                # Move the label to the right if it's too close
                if x1 < x2:
                    text1.set_position((x1 + 0.015, y1))
                else:
                    text1.set_position((x1 - 0.015, y1))
                # Move the label up if it's too close vertically
                if y1 < y2:
                    text1.set_position((x1, y1 + 0.015))
                else:
                    text1.set_position((x1, y1 - 0.015))

# Set limits to zoom into Dubai
ax.set_xlim(dubai_bounds["minx"], dubai_bounds["maxx"])
ax.set_ylim(dubai_bounds["miny"], dubai_bounds["maxy"])

ax.set_title("Average Annual Rent Amount by Top 10 Areas (Dubai)")
plt.show()
