In [202]:
import pandas as pd
import numpy as np
import re
import plotly.graph_objects as go
import plotly.express as px
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline() # Use Plotly locally

pd.set_option('display.max_columns',150)
pd.set_option('display.max_rows',150)

In [224]:
df = pd.read_csv('merged_rental_hyderabad.csv')
df.head()

Unnamed: 0,bedrooms,locality,property_type,ispaiduser,user_type,other,description,bathrooms,date_posted,builder,project,floor,furnish_status,price,area_sqft,marital_status_pref,availability,isprice_benford,isarea_benford,loc_count
0,3.0,"Patrika Nagar, Hitech City",Builder Floor Apartment,Y,Agent,floor 1 out of 5 Floors furnishing Furnished...,3BHK Property Available For Rent,3.0,2021-09-18,,,1.0,Furnished,22000.0,2000.0,Bachelors/Family,Immediate,False,False,8.0
1,3.0,"Tellapur, Outer Ring Road",Villa,Y,Agent,furnishing Semi-Furnished tenants preferred ...,it is a gated community villa project providin...,3.0,2021-09-19,,,,Semi-Furnished,40000.0,,Bachelors/Family,Immediate,False,False,42.0
2,4.0,"Kokapet, Outer Ring Road",Villa,Y,Agent,furnishing Furnished tenants preferred Bache...,Prime location.Celebritis living place.Excelle...,5.0,2021-09-17,,,,Furnished,160000.0,4300.0,Bachelors/Family,Immediate,True,False,98.0
3,3.0,,Apartment,Y,Agent,floor 1 out of 26 Floors furnishing Semi-Fur...,"3 BHK, Multistorey Apartment is available for ...",3.0,2021-09-18,,,1.0,Semi-Furnished,31000.0,1835.0,Bachelors/Family,Immediate,False,True,
4,3.0,Gachibowli,Apartment,Y,Agent,floor 1 out of 10 Floors furnishing Unfurnis...,In the Centre of Prime location. Proximity to ...,5.0,2021-09-18,Trendset Builders,Trendset Winz,1.0,Unfurnished,55000.0,2800.0,Bachelors/Family,Immediate,False,False,465.0


In [244]:
prop_breakup = df['property_type'].value_counts()
prop_breakup = dict(prop_breakup)
list1 = list(prop_breakup.keys())
list2 = list(prop_breakup.values())

prop_df = pd.DataFrame(list(zip(list1,list2)), columns=['prop_type','count'])

fig = px.bar(prop_df,x='prop_type', y='count', text='count',
            labels={'count':'Count'}, color='count', color_continuous_scale = 'viridis') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Sample Size: Property Types</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.47,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Property Type',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Count',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [207]:
locations = df['locality'].value_counts()
locations = dict(locations)
list1 = locations.keys()
list2 = locations.values()
locations_df = pd.DataFrame(list(zip(list1,list2)), columns=['locality','count'])

fig = px.bar(locations_df[0:20],y='locality', x='count', text='count',orientation='h',
            labels={'count':'Count'}, color='count', color_continuous_scale = 'Turbo') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Most Popular locations (by Listings Count)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.6,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Count',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Location',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [209]:
builders = df['builder'].value_counts()
builders = dict(builders)
list1 = builders.keys()
list2 = builders.values()
builders_df = pd.DataFrame(list(zip(list1,list2)), columns=['builder','count'])

fig = px.bar(builders_df[0:20],y='builder', x='count', text='count',orientation='h',
            labels={'count':'Count'}, color='count', color_continuous_scale = 'viridis') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Top Builders (Listings Count)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.6,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Count',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Builder',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [210]:
projects_df = df
projects_df['pro_loc'] = project_df['project'] + ':' + project_df['locality']
projects = projects_df['pro_loc'].value_counts()
projects = dict(projects)
list1 = projects.keys()
list2 = projects.values()
projects_df = pd.DataFrame(list(zip(list1,list2)), columns=['project','count'])

fig = px.bar(projects_df[0:20],y='project', x='count', text='count',orientation='h',
            labels={'count':'Count'}, color='count') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Top Projects by (Listings Count)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.6,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Count',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Project',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [213]:
furnished = df['furnish_status'].value_counts()
furnished = dict(furnished)

fig = px.pie(values=list(furnished.values()), names=list(furnished.keys()), hole=0.3,
            color_discrete_sequence=px.colors.sequential.Turbo)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(title_text="<b>Furnished Status (All Listings)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.45,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

In [212]:
prop = df['property_type'].value_counts()
prop = dict(prop)

fig = px.pie(values=list(prop.values()), names=list(prop.keys()), hole=0.3,
            color_discrete_sequence=px.colors.sequential.Hot)
fig.update_traces(textposition='outside', textinfo='percent+label')

fig.update_layout(title_text="<b>Property Type (All Listings)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

In [215]:
floor = df['floor'].value_counts()
floor = dict(floor)

fig = go.Figure()

fig.add_trace(go.Bar(
    y= list(floor.keys())[0:20],
    x= list(floor.values())[0:20],
    orientation='h',
    marker_color='blue',
    text=list(floor.values())[0:20])
             )


fig.update_traces(textposition='outside')


fig.update_layout(title_text="<b>Top 20 Floors (by Count)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.57,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Count',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Floor Number',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [216]:
users = df['user_type'].value_counts()
users = dict(users)

fig = px.pie(values=list(users.values()), names=list(users.keys()),
            color_discrete_sequence=px.colors.sequential.Bluered)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(title_text="<b>User Type</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.45,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top'
                 )


In [218]:
prop_grp = df.groupby('property_type')
prop_rents = prop_grp['price'].mean()
prop_rents = dict(prop_rents)
list1 = prop_rents.keys()
list2 = prop_rents.values()
proprents_df = pd.DataFrame(list(zip(list1,list2)), columns=['property_type','mean_rent'])
proprents_df['mean_rent'] = proprents_df['mean_rent'].astype(int)

fig = px.bar(proprents_df,x='property_type', y='mean_rent', text='mean_rent',
            labels={'mean_rent':'Average Rent'}, color='mean_rent', color_continuous_scale = 'viridis') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Average Rents (by Property Type)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.47,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Property Type',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Average Rent (INR)',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [219]:
fur_grp = df.groupby('furnish_status')
fur_rents = fur_grp['price'].mean()
fur_rents = dict(fur_rents)
list1 = fur_rents.keys()
list2 = fur_rents.values()
furrents_df = pd.DataFrame(list(zip(list1,list2)), columns=['furnish_type','mean_rent'])
furrents_df['mean_rent'] = furrents_df['mean_rent'].astype(int)

fig = px.bar(furrents_df,x='furnish_type', y='mean_rent', text='mean_rent',
            labels={'mean_rent':'Average Rent'}, color='mean_rent', color_continuous_scale = 'viridis') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Average Rents (by Furnished Status)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.47,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Property Type',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Average Rent (INR)',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [220]:
df['date_posted'] = pd.to_datetime(df['date_posted'], format='%Y-%m-%d')
mean_list = []

for i in range(7,10):
    filt = (df['date_posted'].dt.month==i)
    df_new = df.loc[filt]
    mean_list.append(df_new['price'].mean())

mean_list = list(map(int,mean_list))    
mean_names = ['July 2021', 'August 2021','September 2021']

fig = go.Figure()

fig.add_trace(go.Bar(
    x= mean_names,
    y= mean_list,
    name='Average Rent Value',
    marker_color='orange',
    text=mean_list)
             )

fig.update_traces(textposition='outside')

fig.update_layout(title_text="<b>Monthly Trend: Average Rents</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

fig.update_xaxes(
        color='teal',
        title_text='Month',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        nticks=48,
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Average Rent (INR)',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        linecolor='red',
        linewidth=3,
        mirror = True) 

In [221]:
floor_df = df
floor_grp = floor_df.groupby('floor')
floor_df['floor_count'] = floor_grp['floor'].transform('count') 
filt = (floor_df['floor_count']>=10)
floor_df = floor_df.loc[filt]

floor_grp = floor_df.groupby('floor')
floor_rents = floor_grp['price'].median().sort_values(ascending=False)
floor_rents = dict(floor_rents)
list1 = floor_rents.keys()
list2 = floor_rents.values()
floorrents_df = pd.DataFrame(list(zip(list1,list2)), columns=['floor','median_rent'])
floorrents_df['median_rent'] = floorrents_df['median_rent'].astype(int)

fig = px.bar(floorrents_df[0:20],y='floor', x='median_rent', text='median_rent',orientation='h',
            labels={'median_rent':'Median Rent'}, color='median_rent', color_continuous_scale = 'Turbo') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Median Rents by Floor Number</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Median Rent (INR)',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Floor Number',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [223]:
prop = df['marital_status_pref'].value_counts()
prop = dict(prop)

fig = px.pie(values=list(prop.values()), names=list(prop.keys()), hole=0.3,
            color_discrete_sequence=px.colors.sequential.Hot)
fig.update_traces(textposition='outside', textinfo='percent+label')

fig.update_layout(title_text="<b>Tenants Preferred (Overall)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

In [238]:
loc_df = df.loc[df['loc_count']>=10]
loc_grp = loc_df.groupby('locality')
loc_rents = loc_grp['price'].median()
loc_rents = dict(loc_rents)

list1 = list(loc_rents.keys())
list2 = list(loc_rents.values())
list2 = [int(i) for i in list2]

loc_df = pd.DataFrame(list(zip(list1,list2)), columns=['locality','median_rent'])
loc_df.sort_values(by='median_rent',ascending=False,inplace=True)

fig = px.bar(loc_df[0:20],y='locality', x='median_rent', text='median_rent',orientation='h',
            labels={'median_rent':'Median Rent'}, color='median_rent', color_continuous_scale = 'Turbo') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Most Expensive Locations (Overall)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.62,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Median Rent (INR)',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Location',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [241]:
loc_df = df.loc[df['loc_count']>=10]
loc_grp = loc_df.groupby('locality')
loc_rents = loc_grp['price'].mean()
loc_rents = dict(loc_rents)

list1 = list(loc_rents.keys())
list2 = list(loc_rents.values())
list2 = [int(i) for i in list2]

loc_df = pd.DataFrame(list(zip(list1,list2)), columns=['locality','average_rent'])
loc_df.sort_values(by='average_rent',inplace=True)

fig = px.bar(loc_df[0:20],y='locality', x='average_rent', text='average_rent',orientation='h',
            labels={'average_rent':'Average Rent'}, color='average_rent', color_continuous_scale = 'Turbo') 

fig.update_traces(textposition='outside')
fig.update_layout(title_text="<b>Most Affordable Locations (Overall)</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.53,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total descending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Average Rent (INR)',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Location',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks = 20,
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)