In [23]:
# Import dependencies
import requests
import pprint
import pandas as pd
import json
import os
import csv
from pymongo import MongoClient
from pprint import pprint

## Property Listings Data

In [24]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')

# Create the Database in Mongo Db
db = client['real_estate_price_prediction_project_db']  

# Create the collection in MongoDB
collection = db['property_listings_price_prediction'] 



## Milwaukee Property Listings with Predicted Price


In [25]:
# Set the column width
pd.options.display.max_colwidth = 200

# Read the dataset and store as pandas dataframe. 
properties_listings = pd.read_csv("Resources/revised-final-listings-dataset-with-predictions.csv")
properties_listings.head()

Unnamed: 0.1,Unnamed: 0,district,nbhd,style,extwall,stories,year_built,rooms,finishedsqft,lotsize,...,state,city,address,zipcode,latitude,longitude,marketestimate,price,2025 predictions,Price difference
0,0,7,1680,ranch,aluminum/vinyl,1.0,1947,4,667,4356.0,...,WI,Milwaukee,North 35th STREET,53216,43.095448,-87.957344,139700.0,137500,184857.0,47357.0
1,1,5,2080,cape cod,aluminum/vinyl,1.0,1950,4,953,4791.6,...,WI,Milwaukee,North 78th STREET,53222,43.0746,-88.009514,192900.0,179000,197003.0,18003.0
2,2,13,4720,colonial,aluminum/vinyl,2.0,1932,5,1152,8712.0,...,WI,Milwaukee,West Bottsford AVENUE,53221,42.964855,-87.95444,259000.0,249000,253020.1,4020.1
3,3,9,240,ranch,aluminum / vinyl,1.0,1992,0,1156,7840.8,...,WI,Milwaukee,West Eden PLACE,53220,42.98033,-88.00272,157300.0,144900,162058.0,17158.0
4,4,15,3000,colonial,aluminum/vinyl,2.0,2002,6,1184,4356.0,...,WI,Milwaukee,North 25th STREET,53205,43.056957,-87.944756,132800.0,135000,138645.0,3645.0


In [26]:
#dropping the unknown column 
properties_listings= properties_listings.drop(columns=['Unnamed: 0'])
properties_listings.head()

Unnamed: 0,district,nbhd,style,extwall,stories,year_built,rooms,finishedsqft,lotsize,units,...,state,city,address,zipcode,latitude,longitude,marketestimate,price,2025 predictions,Price difference
0,7,1680,ranch,aluminum/vinyl,1.0,1947,4,667,4356.0,1,...,WI,Milwaukee,North 35th STREET,53216,43.095448,-87.957344,139700.0,137500,184857.0,47357.0
1,5,2080,cape cod,aluminum/vinyl,1.0,1950,4,953,4791.6,1,...,WI,Milwaukee,North 78th STREET,53222,43.0746,-88.009514,192900.0,179000,197003.0,18003.0
2,13,4720,colonial,aluminum/vinyl,2.0,1932,5,1152,8712.0,1,...,WI,Milwaukee,West Bottsford AVENUE,53221,42.964855,-87.95444,259000.0,249000,253020.1,4020.1
3,9,240,ranch,aluminum / vinyl,1.0,1992,0,1156,7840.8,1,...,WI,Milwaukee,West Eden PLACE,53220,42.98033,-88.00272,157300.0,144900,162058.0,17158.0
4,15,3000,colonial,aluminum/vinyl,2.0,2002,6,1184,4356.0,1,...,WI,Milwaukee,North 25th STREET,53205,43.056957,-87.944756,132800.0,135000,138645.0,3645.0


In [27]:

# Confrm columns have been cleaned and identify the necessary ones only for export to mongodb
properties_listings.dtypes


district              int64
nbhd                  int64
style                object
extwall              object
stories             float64
year_built            int64
rooms                 int64
finishedsqft          int64
lotsize             float64
units                 int64
bdrms                 int64
baths_total           int64
state                object
city                 object
address              object
zipcode               int64
latitude            float64
longitude           float64
marketestimate      float64
price                 int64
2025 predictions    float64
Price difference    float64
dtype: object

In [28]:
# Assign the collection to a variable
properties_price_prediction = db['property_listings_price_prediction']

In [29]:
# Convert the dataframe to dictionary values and insert all entries into the specified collection
properties_listings.reset_index(inplace=True)
data_dict = properties_listings.to_dict("records")
properties_price_prediction.insert_many(data_dict)

InsertManyResult([ObjectId('666a44560e9bbf2f7969b1b3'), ObjectId('666a44560e9bbf2f7969b1b4'), ObjectId('666a44560e9bbf2f7969b1b5'), ObjectId('666a44560e9bbf2f7969b1b6'), ObjectId('666a44560e9bbf2f7969b1b7'), ObjectId('666a44560e9bbf2f7969b1b8'), ObjectId('666a44560e9bbf2f7969b1b9'), ObjectId('666a44560e9bbf2f7969b1ba'), ObjectId('666a44560e9bbf2f7969b1bb'), ObjectId('666a44560e9bbf2f7969b1bc'), ObjectId('666a44560e9bbf2f7969b1bd'), ObjectId('666a44560e9bbf2f7969b1be'), ObjectId('666a44560e9bbf2f7969b1bf'), ObjectId('666a44560e9bbf2f7969b1c0'), ObjectId('666a44560e9bbf2f7969b1c1'), ObjectId('666a44560e9bbf2f7969b1c2'), ObjectId('666a44560e9bbf2f7969b1c3'), ObjectId('666a44560e9bbf2f7969b1c4'), ObjectId('666a44560e9bbf2f7969b1c5'), ObjectId('666a44560e9bbf2f7969b1c6'), ObjectId('666a44560e9bbf2f7969b1c7'), ObjectId('666a44560e9bbf2f7969b1c8'), ObjectId('666a44560e9bbf2f7969b1c9')], acknowledged=True)

In [30]:
# List out the collections in the database
print(db.list_collection_names())

['property_listings_price_prediction']


In [31]:
# review a document in the collection
print(db.property_listings_price_prediction.find_one())

{'_id': ObjectId('666a44560e9bbf2f7969b1b3'), 'index': 0, 'district': 7, 'nbhd': 1680, 'style': 'ranch', 'extwall': 'aluminum/vinyl', 'stories': 1.0, 'year_built': 1947, 'rooms': 4, 'finishedsqft': 667, 'lotsize': 4356.0, 'units': 1, 'bdrms': 2, 'baths_total': 1, 'state': 'WI', 'city': 'Milwaukee', 'address': 'North 35th STREET', 'zipcode': 53216, 'latitude': 43.095448, 'longitude': -87.957344, 'marketestimate': 139700.0, 'price': 137500, '2025 predictions': 184857.0, 'Price difference': 47357.0}


In [32]:
# List out the collections in the database
mongo = MongoClient(port=27017)
db = mongo['real_estate_price_prediction_project_db']  
print(db.list_collection_names())

# Closing the connection to MongoDB
client.close()

['property_listings_price_prediction']
