# Importing the dependencies

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno
import re
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import MultiLabelBinarizer
import ast

# Loading both the version 1 csv files of flats and independnet houses
flats_df = pd.read_csv('Cleaned_datasets/Clean_Flats_V1.csv')
Ih_df = pd.read_csv('Cleaned_datasets/Cleaned_IH_V1.csv')
Appartment_df = pd.read_csv('Raw_data/appartments.csv') 

### Loading and concatinating the dataframes

In [None]:
# Reseting the index
flats_df.reset_index(drop=True,inplace=True)
Ih_df.reset_index(drop=True,inplace=True)

# Concatinating both the dataframes
df = pd.concat([flats_df,Ih_df],ignore_index=True)

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Exploratory data analysis

In [None]:
# Checking shape of the dataframe
df.shape

In [None]:
# Checking features information
df.info()

In [None]:
# Checking any null values
msno.bar(df, figsize=(15, 3), color="dodgerblue", sort="ascending", fontsize=12)
plt.show()

In [None]:
if df.duplicated().sum() == 0:
    print("No duplicate values")
else:
    print("Removed duplicates")
    df.drop_duplicates(inplace=True)

In [None]:
# Extracting secotor number from the property namea and create a new feature with name sector
df.insert(loc=3,column='sector',value=df['property_name'].str.split('in').str.get(1).str.replace('Gurgaon','').str.strip())

# Lowercasing the values
df['sector'] = df['sector'].str.lower()

In [None]:
# Considering only those where the sector count is either equal or more than 3 
value_counts = df['sector'].value_counts()[df['sector'].value_counts() >= 3]
df = df[df['sector'].isin(value_counts.index)]

In [None]:
df['sector'] = df['sector'].str.replace('sector 95a','sector 95')
df['sector'] = df['sector'].str.replace('sector 23a','sector 23')
df['sector'] = df['sector'].str.replace('sector 12a','sector 12')
df['sector'] = df['sector'].str.replace('sector 3a','sector 3')
df['sector'] = df['sector'].str.replace('sector 110 a','sector 110')
df['sector'] = df['sector'].str.replace('patel nagar','sector 15')
df['sector'] = df['sector'].str.replace('a block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('maruti kunj','sector 12')
df['sector'] = df['sector'].str.replace('b block sector 43','sector 43')

df['sector'] = df['sector'].str.replace('sector-33 sohna road','sector 33')
df['sector'] = df['sector'].str.replace('sector 1 manesar','manesar')
df['sector'] = df['sector'].str.replace('sector 4 phase 2','sector 4')
df['sector'] = df['sector'].str.replace('sector 1a manesar','manesar')
df['sector'] = df['sector'].str.replace('c block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('sector 89 a','sector 89')
df['sector'] = df['sector'].str.replace('sector 2 extension','sector 2')
df['sector'] = df['sector'].str.replace('sector 36 sohna road','sector 36')

df['sector'] = df['sector'].str.replace('sector 37d','sector 37')
df['sector'] = df['sector'].str.replace('sector 70a','sector 70')
df['sector'] = df['sector'].str.replace('sector 37c','sector 33')
df['sector'] = df['sector'].str.replace('sector-33 sohna','sector 33')
df['sector'] = df['sector'].str.replace('sector 99a','sector 99')
df['sector'] = df['sector'].str.replace('sector 88a','sector 88')
df['sector'] = df['sector'].str.replace('sector 67a','sector 67')
df['sector'] = df['sector'].str.replace('sector 82a','sector 82')

df['sector'] = df['sector'].str.replace('sector 63a','sector 63')
df['sector'] = df['sector'].str.replace('sector 36a','sector 36')
df['sector'] = df['sector'].str.replace('sector 9a','sector 9')
df['sector'] = df['sector'].str.replace('sector 10a','sector 10')
df['sector'] = df['sector'].str.replace('sector 17b','sector 17')
df['sector'] = df['sector'].str.replace('sector 17a','sector 17')
df['sector'] = df['sector'].str.replace('sector 88b','sector 88')
df['sector'] = df['sector'].str.replace('sector-2 sohna','sector 2')

df['sector'] = df['sector'].str.replace('sector 37a','sector 37')
df['sector'] = df['sector'].str.replace('near sector 1 market','sector 1')
df['sector'] = df['sector'].str.replace('sector-35 sohna','sector 35')
df['sector'] = df['sector'].str.replace('new colony, sector 7','sector 7')
df['sector'] = df['sector'].str.replace('civil l','sector 1')
df['sector'] = df['sector'].str.replace('block g sector-57','sector 57')
df['sector'] = df['sector'].str.replace('anand garden, sector-105','sector 105')
df['sector'] = df['sector'].str.replace('sector 9b','sector 9b')

After fixing the sector values having a,b,c now we need to find the actual sectors of the places and replace them with their sector number.

In [None]:
df['sector'] = df['sector'].str.replace('mg road','sector 25')
df['sector'] = df['sector'].str.replace('a block sushant lok phase 1','sector 26')
df['sector'] = df['sector'].str.replace('prem nagar','sector 13')
df['sector'] = df['sector'].str.replace('b block sushant lok phase 1','sector 40')
df['sector'] = df['sector'].str.replace('gandhi nagar','sector 11')
df['sector'] = df['sector'].str.replace('laxmi garden','sector 11')
df['sector'] = df['sector'].str.replace('shakti nagar','sector 11')
df['sector'] = df['sector'].str.replace('vishnu garden','sector 105')

df['sector'] = df['sector'].str.replace('sohna','sector 1')
df['sector'] = df['sector'].str.replace('nirvana country','sector 50')
df['sector'] = df['sector'].str.replace('dlf phase 2','sector 25')
df['sector'] = df['sector'].str.replace('dlf phase 1','sector 25')
df['sector'] = df['sector'].str.replace('dlf phase 3','sector 24')
df['sector'] = df['sector'].str.replace('dlf phase 4','sector 24')
df['sector'] = df['sector'].str.replace('dlf phase 5','sector 53')
df['sector'] = df['sector'].str.replace('palam vihar','sector 1')
df['sector'] = df['sector'].str.replace('laxman vihar','sector 3')
df['sector'] = df['sector'].str.replace('sushant lok phase 1','sector 43')
df['sector'] = df['sector'].str.replace('vishnu garden','sector 105')

df['sector'] = df['sector'].str.replace('sector 3 phase 2','sector 3')
df['sector'] = df['sector'].str.replace('sector 1 road','sector 1')
df['sector'] = df['sector'].str.replace('south city 1','sector 1')
df['sector'] = df['sector'].str.replace('c block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('sector 36 sector 1','sector 36')
df['sector'] = df['sector'].str.replace('sector 1 imt manesar','sector 1')
df['sector'] = df['sector'].str.replace('sector 1a imt manesar','sector 1')
df['sector'] = df['sector'].str.replace('new sector 1','sector 1')
df['sector'] = df['sector'].str.replace('new','sector 1')

df['sector'] = df['sector'].str.replace('imt manesar','sector 1')
df['sector'] = df['sector'].str.replace('suncity','sector 54')
df['sector'] = df['sector'].str.replace('surya vihar','sector 21')
df['sector'] = df['sector'].str.replace('adarsh nagar','sector 12')
df['sector'] = df['sector'].str.replace('valley view estate','sector 58')
df['sector'] = df['sector'].str.replace('bhondsi','sector 68')
df['sector'] = df['sector'].str.replace('dharam colony','sector 12')
df['sector'] = df['sector'].str.replace('shivaji nagar','sector 11')
df['sector'] = df['sector'].str.replace('madanpuri','sector 7')

df['sector'] = df['sector'].str.replace('shivpuri','sector 7')
df['sector'] = df['sector'].str.replace('devilal colony','sector 9')
df['sector'] = df['sector'].str.replace('bhim nagar','sector 6')
df['sector'] = df['sector'].str.replace('ravi nagar','sector 9')
df['sector'] = df['sector'].str.replace('krishna colony','sector 7')
df['sector'] = df['sector'].str.replace('baldev nagar','sector 7')
df['sector'] = df['sector'].str.replace('dharam colony','sector 12')
df['sector'] = df['sector'].str.replace('garhi harsaru','sector 93')
df['sector'] = df['sector'].str.replace('manesar','sector 1')

df['sector'] = df['sector'].str.replace('subhash nagar','sector 6')
df['sector'] = df['sector'].str.replace('dayanand colony','sector 6')
df['sector'] = df['sector'].str.replace('greenwood city','sector 45')
df['sector'] = df['sector'].str.replace('chakkarpur','sector 18')
df['sector'] = df['sector'].str.replace('sushant lok phase 2','sector 55')
df['sector'] = df['sector'].str.replace('saraswati vihar','sector 28')
df['sector'] = df['sector'].str.replace('ansal plaza','sector 1')
df['sector'] = df['sector'].str.replace('arjun nagar','sector 8')
df['sector'] = df['sector'].str.replace('rajiv nagar','sector 13')

df['sector'] = df['sector'].str.replace('jacobpura','sector 12')
df['sector'] = df['sector'].str.replace('jyoti park','sector 7')
df['sector'] = df['sector'].str.replace('ashok vihar','sector 3')
df['sector'] = df['sector'].str.replace('sector 1 colony','sector 1')
df['sector'] = df['sector'].str.replace('surat nagar 1','sector 104')
df['sector'] = df['sector'].str.replace('mianwali colony','sector 12')
df['sector'] = df['sector'].str.replace('dwarka expressway','sector 99')
df['sector'] = df['sector'].str.replace('malibu town','sector 47')
df['sector'] = df['sector'].str.replace('mehrauli  road','sector 28')

df['sector'] = df['sector'].str.replace('sector 1 extension','sector 1')
df['sector'] = df['sector'].str.replace('sector 3 phase 2','sector 3')
df['sector'] = df['sector'].str.replace('sector 3 phase 3 extension','sector 3')
df['sector'] = df['sector'].str.replace('uppals southend','sector 49')
df['sector'] = df['sector'].str.replace('rajendra park','sector 105')
df['sector'] = df['sector'].str.replace('sushant lok phase 3','sector 57')
df['sector'] = df['sector'].str.replace('gwal pahari','sector 55')

For the `areaWithType` feature if we will carefully observe then we will realize that there are 4 different types of areas mentioned in this this feature and these 4 types are ( Plot area, Carpet area, Built up area and Super built up area ).

Also in the `Area` feature in some rows we are given carpet area but in some other rows we are given some other type of area, thus this feature is not very much reliable and instead of focusing on the `Area` feature we will be utilizing the `areaWithType` feature and extract all 4 areas from it and create 4 new features.

In [None]:
df[['Area','areaWithType']].sample(5)

In [None]:
class Extract_featres_Area:
    """
    This class will be used to extract new features from existing areaWithTYpe feature and
    it will also help us to convert sq.m to sq.ft
    """

    # This function extracts the Super Built up area
    def get_super_built_up_area(self,text):
        match = re.search(r'Super Built up area (\d+\.?\d*)', text)
        if match:
            return float(match.group(1))
        return None

    # This function extracts the Built Up area or Carpet area
    def get_area(self,text, area_type):
        match = re.search(area_type + r'\s*:\s*(\d+\.?\d*)', text)
        if match:
            return float(match.group(1))
        return None
    
    # Function to extract plot area from 'areaWithType' column
    def extract_plot_area(self,area_with_type):
        match = re.search(r'Plot area (\d+\.?\d*)', area_with_type)
        if match:
            return float(match.group(1))
        else:
            None

    # This function checks if the area is provided in sq.m. and converts it to sqft if needed
    def convert_to_sqft(self,text, area_value):
        if area_value is None:
            return None
        match = re.search(r'{} \((\d+\.?\d*) sq.m.\)'.format(area_value), text)
        if match:
            sq_m_value = float(match.group(1))
            return sq_m_value * 10.7639  # conversion factor from sq.m. to sqft
        return area_value
    
    
    # This method will convert the built_up_area of indpendent houses to sqft
    def convert_scale(self,row):
        if np.isnan(row['Area']) or np.isnan(row['built_up_area']):
            return row['built_up_area']
        else:
            if round(row['Area']/row['built_up_area']) == 9.0:
                return row['built_up_area'] * 9
            elif round(row['Area']/row['built_up_area']) == 11.0:
                return row['built_up_area'] * 10.7
            else:
                return row['built_up_area']

With a carefully analysis we will see that the rows where the `Property_Type` is Independent_House have missing values from all the newly created features and the reason is that Independent houses do not have any carpet area, builtup area or super built up area, instead they have a plot area. So we need to extract the plot area from the `areaWithType` feature and convert it to sqft. So let's include another method in the above class for extracting the plot area and creating new feature.

In [None]:
# Instantiating the class
clean_area = Extract_featres_Area()

# Extract Super Built up area and convert to sqft if needed
df['super_built_up_area'] = df['areaWithType'].apply(clean_area.get_super_built_up_area)
df['super_built_up_area'] = df.apply(lambda x: clean_area.convert_to_sqft(x['areaWithType'], x['super_built_up_area']), axis=1)

# Extract Built Up area and convert to sqft if needed
df['built_up_area'] = df['areaWithType'].apply(lambda x: clean_area.get_area(x, 'Built Up area'))
df['built_up_area'] = df.apply(lambda x: clean_area.convert_to_sqft(x['areaWithType'], x['built_up_area']), axis=1)

# Extract Carpet area and convert to sqft if needed
df['carpet_area'] = df['areaWithType'].apply(lambda x: clean_area.get_area(x, 'Carpet area'))
df['carpet_area'] = df.apply(lambda x: clean_area.convert_to_sqft(x['areaWithType'], x['carpet_area']), axis=1)

In [None]:
Ind_House_df = df[((df['super_built_up_area'].isnull()) & (df['built_up_area'].isnull()) & (df['carpet_area'].isnull()))][['price','Property_Type','Area','areaWithType','super_built_up_area','built_up_area','carpet_area']]

# Extracting Plot area ( Plot area of houses ~ Built up area of flats )
Ind_House_df['built_up_area'] = Ind_House_df['areaWithType'].apply(clean_area.extract_plot_area)

# Convertions of values to sqft
Ind_House_df['built_up_area'] = Ind_House_df.apply(clean_area.convert_scale,axis=1)

# Updating the original dataframe
df.update(Ind_House_df)

In the `additionalRoom` feature there are a lot of rows having study room, pooja room and store room, so we will be creating 4 binary features where 1 will represent type of additional room present. 

In [None]:
df['additionalRoom'].value_counts().sample(6)

In [None]:
# List of new columns to be created
new_cols = ['study room', 'servant room', 'store room', 'pooja room', 'others']

# Populate the new columns based on the "additionalRoom" column
for col in new_cols:
    df[col] = df['additionalRoom'].str.contains(col).astype(int)

In [None]:
df['agePossession'].unique()

The `agePossession` feature has high cardinatlity so to deal with this type we will replace the values with new values such that the overall information is preserved.

In [None]:
def categorize_age_possession(value):
    if pd.isna(value):
        return "Undefined"
    if "0 to 1 Year Old" in value or "Within 6 months" in value or "Within 3 months" in value:
        return "New Property"
    if "1 to 5 Year Old" in value:
        return "Relatively New"
    if "5 to 10 Year Old" in value:
        return "Moderately Old"
    if "10+ Year Old" in value:
        return "Old Property"
    if "Under Construction" in value or "By" in value:
        return "Under Construction"
    try:
        # For entries like 'May 2024'
        int(value.split(" ")[-1])
        return "Under Construction"
    except:
        return "Undefined"
    
df['agePossession'] = df['agePossession'].apply(categorize_age_possession)

In [None]:
# Let's extract all unique furnishings from the furnishDetails column
all_furnishings = []
for detail in df['furnishDetails'].dropna():
    furnishings = detail.replace('[', '').replace(']', '').replace("'", "").split(', ')
    all_furnishings.extend(furnishings)
    
unique_furnishings = list(set(all_furnishings))

In [None]:
# Defining a function to extract the count of a furnishing from the furnishDetails
def get_furnishing_count(details, furnishing):
    if isinstance(details, str):
        if f"No {furnishing}" in details:
            return 0
        pattern = re.compile(f"(\d+) {furnishing}")
        match = pattern.search(details)
        if match:
            return int(match.group(1))
        elif furnishing in details:
            return 1
    return 0

In [None]:
# Simplify the furnishings list by removing "No" prefix and numbers
columns_to_include = [re.sub(r'No |\d+', '', furnishing).strip() for furnishing in unique_furnishings]
columns_to_include = list(set(columns_to_include))  # Get unique furnishings
columns_to_include = [furnishing for furnishing in columns_to_include if furnishing]  # Remove empty strings

# Create new columns for each unique furnishing and populate with counts
for furnishing in columns_to_include:
    df[furnishing] = df['furnishDetails'].apply(lambda x: get_furnishing_count(x, furnishing))

# Create the new dataframe with the required columns
furnishings_df = df[['furnishDetails'] + columns_to_include]

In [None]:
# Let's see how the dataframe looks like
furnishings_df.head(4)

# Dropping the first feature so that we could create clusters
furnishings_df.drop(columns=['furnishDetails'],inplace=True)

In [None]:
# Scaling the data points to common scale
scaler = StandardScaler()
scaled_data = scaler.fit_transform(furnishings_df)

In [None]:
# Empty list to score silhouette score
silhouette_sc = []
for i in range(2,11):
    kmeans = KMeans(n_clusters = i,random_state=0,n_init=10,init='k-means++')
    kmeans.fit(scaled_data)
    labels = kmeans.predict(scaled_data)
    silhouette_avg = silhouette_score(scaled_data, labels)
    silhouette_sc.append(silhouette_avg)
    
fig,ax = plt.subplots(figsize=(5,3))

plt.plot(range(2, 11), silhouette_sc)
plt.title('Silhouette graph')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
n_clusters = 3

# Fit the KMeans model
kmeans = KMeans(n_clusters=n_clusters, random_state=42,n_init='auto')
kmeans.fit(scaled_data)

# Predict the cluster assignments for each row
cluster_assignments = kmeans.predict(scaled_data)

In [None]:
# Selecting all the features other than furnish detials count
df = df.iloc[:,:-18]

# Creating a new feature where  0 -> unfurnished, 1 -> semifurnished and 2 -> furnished
df['furnishing_type'] = cluster_assignments

In [None]:
Appartment_df.head(4)

What is happening with the appartment dataframe

In [None]:
# Lowering the property name
Appartment_df['PropertyName'] = Appartment_df['PropertyName'].str.lower()

# Accessing all the rows having null feature values
Null_feature_df = df[df['features'].isnull()]

# Performing the left join
x = Null_feature_df.merge(Appartment_df,left_on='society',right_on='PropertyName',how='left')['TopFacilities']

# Filling missing values in feature column with new values
df.loc[Null_feature_df.index,'features'] = x.values

In [None]:
# Convert the string representation of lists in the 'features' column to actual lists
df['features_list'] = df['features'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) and x.startswith('[') else [])

# Use MultiLabelBinarizer to convert the features list into a binary matrix
mlb = MultiLabelBinarizer()
features_binary_matrix = mlb.fit_transform(df['features_list'])

# Convert the binary matrix into a DataFrame
features_binary_df = pd.DataFrame(features_binary_matrix, columns=mlb.classes_)

In [None]:
# Empty list to score silhouette score
silhouette_sc = []
for i in range(2,11):
    kmeans = KMeans(n_clusters = i,random_state=0,n_init=10,init='k-means++')
    kmeans.fit(features_binary_df)
    labels = kmeans.predict(features_binary_df)
    silhouette_avg = silhouette_score(features_binary_df, labels)
    silhouette_sc.append(silhouette_avg)
    
fig,ax = plt.subplots(figsize=(5,3))

plt.plot(range(2, 11), silhouette_sc)
plt.title('Silhouette graph')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
# Assigning weights based on perceived luxury contribution
weights = {
    '24/7 Power Backup': 8,
    '24/7 Water Supply': 4,
    '24x7 Security': 7,
    'ATM': 4,
    'Aerobics Centre': 6,
    'Airy Rooms': 8,
    'Amphitheatre': 7,
    'Badminton Court': 7,
    'Banquet Hall': 8,
    'Bar/Chill-Out Lounge': 9,
    'Barbecue': 7,
    'Basketball Court': 7,
    'Billiards': 7,
    'Bowling Alley': 8,
    'Business Lounge': 9,
    'CCTV Camera Security': 8,
    'Cafeteria': 6,
    'Car Parking': 6,
    'Card Room': 6,
    'Centrally Air Conditioned': 9,
    'Changing Area': 6,
    "Children's Play Area": 7,
    'Cigar Lounge': 9,
    'Clinic': 5,
    'Club House': 9,
    'Concierge Service': 9,
    'Conference room': 8,
    'Creche/Day care': 7,
    'Cricket Pitch': 7,
    'Doctor on Call': 6,
    'Earthquake Resistant': 5,
    'Entrance Lobby': 7,
    'False Ceiling Lighting': 6,
    'Feng Shui / Vaastu Compliant': 5,
    'Fire Fighting Systems': 8,
    'Fitness Centre / GYM': 8,
    'Flower Garden': 7,
    'Food Court': 6,
    'Foosball': 5,
    'Football': 7,
    'Fountain': 7,
    'Gated Community': 7,
    'Golf Course': 10,
    'Grocery Shop': 6,
    'Gymnasium': 8,
    'High Ceiling Height': 8,
    'High Speed Elevators': 8,
    'Infinity Pool': 9,
    'Intercom Facility': 7,
    'Internal Street Lights': 6,
    'Internet/wi-fi connectivity': 7,
    'Jacuzzi': 9,
    'Jogging Track': 7,
    'Landscape Garden': 8,
    'Laundry': 6,
    'Lawn Tennis Court': 8,
    'Library': 8,
    'Lounge': 8,
    'Low Density Society': 7,
    'Maintenance Staff': 6,
    'Manicured Garden': 7,
    'Medical Centre': 5,
    'Milk Booth': 4,
    'Mini Theatre': 9,
    'Multipurpose Court': 7,
    'Multipurpose Hall': 7,
    'Natural Light': 8,
    'Natural Pond': 7,
    'Park': 8,
    'Party Lawn': 8,
    'Piped Gas': 7,
    'Pool Table': 7,
    'Power Back up Lift': 8,
    'Private Garden / Terrace': 9,
    'Property Staff': 7,
    'RO System': 7,
    'Rain Water Harvesting': 7,
    'Reading Lounge': 8,
    'Restaurant': 8,
    'Salon': 8,
    'Sauna': 9,
    'Security / Fire Alarm': 9,
    'Security Personnel': 9,
    'Separate entry for servant room': 8,
    'Sewage Treatment Plant': 6,
    'Shopping Centre': 7,
    'Skating Rink': 7,
    'Solar Lighting': 6,
    'Solar Water Heating': 7,
    'Spa': 9,
    'Spacious Interiors': 9,
    'Squash Court': 8,
    'Steam Room': 9,
    'Sun Deck': 8,
    'Swimming Pool': 8,
    'Temple': 5,
    'Theatre': 9,
    'Toddler Pool': 7,
    'Valet Parking': 9,
    'Video Door Security': 9,
    'Visitor Parking': 7,
    'Water Softener Plant': 7,
    'Water Storage': 7,
    'Water purifier': 7,
    'Yoga/Meditation Area': 7
}
# Calculate luxury score for each row
luxury_score = features_binary_df[list(weights.keys())].multiply(list(weights.values())).sum(axis=1)

# Creating a new feature
df['luxury_score'] = luxury_score

In [None]:
# Dropping all the unnecessary columns
df.drop(columns=['nearbyLocations','furnishDetails','features','features_list','additionalRoom','Area','areaWithType'],inplace=True)

In [36]:
df.head(3)

Unnamed: 0,property_name,Property_Type,society,sector,price,Price_Per_SQFT,Area,areaWithType,bedRoom,bathroom,balcony,address,floorNum,facing,agePossession,description,rating,super_built_up_area,built_up_area,carpet_area,study room,servant room,store room,pooja room,others,furnishing_type,luxury_score
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,sector 7,0.45,5000.0,900,Carpet area: 900 (83.61 sq.m.),2.0,2.0,1,"Krishna Colony, Gurgaon, Haryana",4,West,Relatively New,So with lift.Maa bhagwati residency is one of ...,"['Environment4 out of 5', 'Safety4 out of 5', ...",,,900.0,0,0,0,0,0,2,28.0
1,2 BHK Flat in Ashok Vihar,flat,apna enclave,sector 3,0.5,7692.0,650,Carpet area: 650 (60.39 sq.m.),2.0,2.0,1,"46b, Ashok Vihar, Gurgaon, Haryana",1,West,Old Property,"Property situated on main road, railway statio...","['Environment4 out of 5', 'Safety4 out of 5', ...",,,650.0,0,0,0,0,0,0,37.0
2,2 BHK Flat in Sohna,flat,tulsiani easy in homes,sector 1,0.4,6722.0,595,Carpet area: 595 (55.28 sq.m.),2.0,2.0,3,"Sohna, Gurgaon, Haryana",12,,New Property,"This property is 15 km away from badshapur, gu...","['Environment4 out of 5', 'Safety4 out of 5', ...",,,595.0,0,0,0,0,0,2,36.0


In [37]:
# Saving current state of data in the csv file
df.to_csv('Cleaned_datasets/Combined_CleanData_V2.csv',index=False)