# Parma Crescent Pricing Model (data preparation)

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time

### Define function to call rightmove for prices

This function returns a data frame with 1 row per transaction (sale).

In [2]:
def get_properties(postcode, page=1):

    # attempt to pull the prices
    postcode = '-'.join(postcode.split(' ')).lower()
    url = f'https://www.rightmove.co.uk/house-prices/{postcode}.html?page={page}'
    res = requests.get(url)
    print(f'{url} returned {res.status_code}')
    if res.status_code != 200: 
        return pd.DataFrame()

    # attempt to find some transactions
    soup = BeautifulSoup(res.text, 'html.parser')
    s = soup.find_all('script')[-2]
    d = json.loads(s.string[s.string.find('{'):])
    df = pd.DataFrame(d['results']['properties'])
    if df.empty: 
        return pd.DataFrame()

    # melt to make 1 row per transaction
    df = df.drop('transactions', axis=1).join(df.transactions.explode()).reset_index(drop=True)
    df = df.drop('transactions', axis=1).join(pd.json_normalize(df.transactions))
    return df

### Iterate over postcodes of intest and compile results in a single frame

In [3]:
dflist = []

# define list of postcodes within area
for postcode in ['SW11 1LT', 'SW11 1LU']:
    page = 1
    while True:
        time.sleep(1)
        df = get_properties(postcode, page)
        if df.empty: break
        dflist.append(df)
        page += 1

https://www.rightmove.co.uk/house-prices/sw11-1lt.html?page=1 returned 200
https://www.rightmove.co.uk/house-prices/sw11-1lt.html?page=2 returned 200
https://www.rightmove.co.uk/house-prices/sw11-1lt.html?page=3 returned 200
https://www.rightmove.co.uk/house-prices/sw11-1lu.html?page=1 returned 200
https://www.rightmove.co.uk/house-prices/sw11-1lu.html?page=2 returned 200


In [4]:
df = pd.concat(dflist).reset_index(drop=True)

In [5]:
df['numericPrice'] = pd.to_numeric(df.displayPrice.str.replace('£', '').str.replace(',', ''))

### Expand address field into categories

In [6]:
df_address = df.address.str.split(', ').explode().to_frame()

In [7]:
# extract the postcode to a new fields and remove it from thje original one
postcode_regex = r'[A-Z]{1,2}[0-9]{1,2}[ ]{0,1}[0-9]{1,2}[A-Z]{1,2}$'
df_address['postcode'] = df_address.address.str.extract(f'({postcode_regex})')
df_address.address = df_address.apply(
    lambda x: x.address if isinstance(x.postcode, float) else x.address.replace(x.postcode, ''), axis=1)

In [8]:
# extract the street number to a new fields and remove it from the original one
df_address['number'] = pd.to_numeric(df_address.address.str.replace(r'[^0-9]*$', '', regex=True), errors='coerce')
df_address['number'] = df_address.apply(
    lambda x: x.address if x.number > 0 else None, axis=1)

In [9]:
# indentify any prefixes or house names and extract those
df_address['prefix'] = df_address.number.notna().groupby(level=0).cumsum() == 0
df_address['prefix'] = df_address.apply(
    lambda x: x.address if x.prefix else None, axis=1)

In [10]:
df_address.loc[df_address.notna().sum(axis=1) > 1, 'address'] = None

In [11]:
# number the remaining fields address_1, address_2 etc.
address_to_idx = [None] + df_address.address.dropna().drop_duplicates().tolist()
df_address['counter'] = df_address.address.apply(lambda x: address_to_idx.index(x)).replace({0: None})
df_address['counter'] = df_address.counter.groupby(level=0).rank(method='first').convert_dtypes()

In [12]:
# melt and repivot to get back to original shape
df_address_melt = df_address.reset_index().melt(id_vars=['index', 'counter']).dropna(subset='value')
df_address_melt['column'] = (df_address_melt.variable + '_' + df_address_melt.counter.astype(str)).str.replace('_<NA>', '')
df_address = df_address_melt.pivot(index='index', columns='column', values='value')

### Append the individual address fields to the original dataset and save

In [13]:
df = df.join(df_address)

In [14]:
df.to_csv('parma_crescent_prices.csv', index=False)

In [15]:
df

Unnamed: 0,address,propertyType,images,hasFloorPlan,location,detailUrl,bedrooms,displayPrice,dateSold,tenure,newBuild,numericPrice,address_1,address_2,number,postcode,prefix
0,"First Floor Flat, 42, Parma Crescent, London, ...",Flat,{'imageUrl': '/spw/images/placeholder/no-image...,False,"{'lat': 51.46221, 'lng': -0.16476}",https://www.rightmove.co.uk/house-prices/detai...,,"£700,000",30 Apr 2024,Leasehold,False,700000,Parma Crescent,London,42,SW11 1LT,First Floor Flat
1,"43, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': 'https://media.rightmove.co.uk/di...,True,"{'lat': 51.46221, 'lng': -0.16476}",https://www.rightmove.co.uk/house-prices/detai...,4.0,"£1,495,000",22 Mar 2024,Freehold,False,1495000,Parma Crescent,London,43,SW11 1LT,
2,"43, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': 'https://media.rightmove.co.uk/di...,True,"{'lat': 51.46221, 'lng': -0.16476}",https://www.rightmove.co.uk/house-prices/detai...,4.0,"£901,000",15 Jun 2007,Freehold,False,901000,Parma Crescent,London,43,SW11 1LT,
3,"43, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': 'https://media.rightmove.co.uk/di...,True,"{'lat': 51.46221, 'lng': -0.16476}",https://www.rightmove.co.uk/house-prices/detai...,4.0,"£550,000",7 Apr 2003,Freehold,False,550000,Parma Crescent,London,43,SW11 1LT,
4,"43, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': 'https://media.rightmove.co.uk/di...,True,"{'lat': 51.46221, 'lng': -0.16476}",https://www.rightmove.co.uk/house-prices/detai...,4.0,"£235,000",20 Apr 1999,Freehold,False,235000,Parma Crescent,London,43,SW11 1LT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,"Top Floor Flat, 74, Parma Crescent, London, Gr...",Flat,{'imageUrl': 'https://media.rightmove.co.uk/di...,False,"{'lat': 51.4622, 'lng': -0.16433}",https://www.rightmove.co.uk/house-prices/detai...,3.0,"£275,000",17 Jun 2002,Leasehold,False,275000,Parma Crescent,London,74,SW11 1LU,Top Floor Flat
158,"Top Floor Flat, 74, Parma Crescent, London, Gr...",Flat,{'imageUrl': 'https://media.rightmove.co.uk/di...,False,"{'lat': 51.4622, 'lng': -0.16433}",https://www.rightmove.co.uk/house-prices/detai...,3.0,"£124,500",15 Nov 1996,Leasehold,False,124500,Parma Crescent,London,74,SW11 1LU,Top Floor Flat
159,"69, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': '/spw/images/placeholder/no-image...,False,"{'lat': 51.4622, 'lng': -0.16433}",https://www.rightmove.co.uk/house-prices/detai...,,"£425,000",10 May 2001,Freehold,False,425000,Parma Crescent,London,69,SW11 1LU,
160,"71, Parma Crescent, London, Greater London SW1...",Terraced,{'imageUrl': 'https://media.rightmove.co.uk/di...,False,"{'lat': 51.4622, 'lng': -0.16433}",https://www.rightmove.co.uk/house-prices/detai...,4.0,"£235,000",8 May 1998,Freehold,False,235000,Parma Crescent,London,71,SW11 1LU,
