In [13]:
# Import dependencies
import requests
import pprint
import pandas as pd
import json
import os
import csv
from pymongo import MongoClient
from pprint import pprint

## Property Listings Data

In [14]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')

# Create the Database in Mongo Db
db = client['real_estate_price_prediction_project_db']  

# Create the collection in MongoDB
collection = db['property_listings_price_prediction'] 



## Milwaukee Property Listings with Predicted Price


In [15]:
# Set the column width
pd.options.display.max_colwidth = 200

# Read the dataset and store as pandas dataframe. 
properties_listings = pd.read_csv("Resources/final-listings-dataset-with-predictions.csv")
properties_listings.head()

Unnamed: 0.1,Unnamed: 0,district,nbhd,style,extwall,stories,rooms,finishedsqft,lotsize,units,...,state,city,address,zipcode,latitude,longitude,marketestimate,price,2024 predictions,Price difference
0,0,7,1440,milwaukee bungalow,aluminum/vinyl,1.0,7,1400,3484.8,1,...,WI,Milwaukee,South 8th STREET,53215,42.996044,-87.921,156700.0,155000,112906.0,-42094.0
1,1,12,4120,duplex-cottage,wood,2.0,10,1600,3484.8,2,...,WI,Milwaukee,East Homer STREET,53207,42.99976,-87.89994,370200.0,349900,116829.25,-233070.75
2,2,10,2710,cottage,aluminum/vinyl,1.0,4,667,4356.0,1,...,WI,Milwaukee,North 35th STREET,53216,43.095448,-87.957344,139700.0,137500,137188.0,-312.0
3,3,15,2400,colonial,aluminum/vinyl,2.0,6,1184,4356.0,1,...,WI,Milwaukee,North 25th STREET,53205,43.056957,-87.944756,132800.0,135000,125095.0,-9905.0
4,4,13,4520,colonial,aluminum/vinyl,2.0,6,1300,4356.0,1,...,WI,Milwaukee,South Whitnall AVENUE,53207,42.982292,-87.905945,360000.0,339000,272777.0,-66223.0


In [16]:
#dropping the unknown column 
properties_listings= properties_listings.drop(columns=['Unnamed: 0'])
properties_listings.head()

Unnamed: 0,district,nbhd,style,extwall,stories,rooms,finishedsqft,lotsize,units,bdrms,...,state,city,address,zipcode,latitude,longitude,marketestimate,price,2024 predictions,Price difference
0,7,1440,milwaukee bungalow,aluminum/vinyl,1.0,7,1400,3484.8,1,3,...,WI,Milwaukee,South 8th STREET,53215,42.996044,-87.921,156700.0,155000,112906.0,-42094.0
1,12,4120,duplex-cottage,wood,2.0,10,1600,3484.8,2,3,...,WI,Milwaukee,East Homer STREET,53207,42.99976,-87.89994,370200.0,349900,116829.25,-233070.75
2,10,2710,cottage,aluminum/vinyl,1.0,4,667,4356.0,1,2,...,WI,Milwaukee,North 35th STREET,53216,43.095448,-87.957344,139700.0,137500,137188.0,-312.0
3,15,2400,colonial,aluminum/vinyl,2.0,6,1184,4356.0,1,3,...,WI,Milwaukee,North 25th STREET,53205,43.056957,-87.944756,132800.0,135000,125095.0,-9905.0
4,13,4520,colonial,aluminum/vinyl,2.0,6,1300,4356.0,1,3,...,WI,Milwaukee,South Whitnall AVENUE,53207,42.982292,-87.905945,360000.0,339000,272777.0,-66223.0


In [17]:

# Confrm columns have been cleaned and identify the necessary ones only for export to mongodb
properties_listings.dtypes


district              int64
nbhd                  int64
style                object
extwall              object
stories             float64
rooms                 int64
finishedsqft          int64
lotsize             float64
units                 int64
bdrms                 int64
baths_total           int64
state                object
city                 object
address              object
zipcode               int64
latitude            float64
longitude           float64
marketestimate      float64
price                 int64
2024 predictions    float64
Price difference    float64
dtype: object

In [18]:
# Assign the collection to a variable
properties_price_prediction = db['property_listings_price_prediction']

In [19]:
# Convert the dataframe to dictionary values and insert all entries into the specified collection
properties_listings.reset_index(inplace=True)
data_dict = properties_listings.to_dict("records")
properties_price_prediction.insert_many(data_dict)

InsertManyResult([ObjectId('666a304d9bbdad740fff962b'), ObjectId('666a304d9bbdad740fff962c'), ObjectId('666a304d9bbdad740fff962d'), ObjectId('666a304d9bbdad740fff962e'), ObjectId('666a304d9bbdad740fff962f'), ObjectId('666a304d9bbdad740fff9630'), ObjectId('666a304d9bbdad740fff9631'), ObjectId('666a304d9bbdad740fff9632'), ObjectId('666a304d9bbdad740fff9633'), ObjectId('666a304d9bbdad740fff9634'), ObjectId('666a304d9bbdad740fff9635'), ObjectId('666a304d9bbdad740fff9636'), ObjectId('666a304d9bbdad740fff9637'), ObjectId('666a304d9bbdad740fff9638'), ObjectId('666a304d9bbdad740fff9639'), ObjectId('666a304d9bbdad740fff963a'), ObjectId('666a304d9bbdad740fff963b'), ObjectId('666a304d9bbdad740fff963c'), ObjectId('666a304d9bbdad740fff963d'), ObjectId('666a304d9bbdad740fff963e'), ObjectId('666a304d9bbdad740fff963f'), ObjectId('666a304d9bbdad740fff9640'), ObjectId('666a304d9bbdad740fff9641'), ObjectId('666a304d9bbdad740fff9642'), ObjectId('666a304d9bbdad740fff9643')], acknowledged=True)

In [20]:
# List out the collections in the database
print(db.list_collection_names())

['property_listings_price_prediction']


In [21]:
# review a document in the collection
print(db.property_listings_price_prediction.find_one())

{'_id': ObjectId('666a304d9bbdad740fff962b'), 'index': 0, 'district': 7, 'nbhd': 1440, 'style': 'milwaukee bungalow', 'extwall': 'aluminum/vinyl', 'stories': 1.0, 'rooms': 7, 'finishedsqft': 1400, 'lotsize': 3484.8, 'units': 1, 'bdrms': 3, 'baths_total': 1, 'state': 'WI', 'city': 'Milwaukee', 'address': 'South 8th STREET', 'zipcode': 53215, 'latitude': 42.996044, 'longitude': -87.921, 'marketestimate': 156700.0, 'price': 155000, '2024 predictions': 112906.0, 'Price difference': -42094.0}


In [22]:
# List out the collections in the database
mongo = MongoClient(port=27017)
db = mongo['real_estate_price_prediction_project_db']  
print(db.list_collection_names())

# Closing the connection to MongoDB
client.close()

['property_listings_price_prediction']
