In [1]:
import requests
import pandas as pd
import time

# Seminar - APIs and real-life coding

## Task 1: Requesting API
### 1a. Create a function requesting data from sreality

```python
base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(i)

r = requests.get(base_url)
d = r.json()
```

* function should parametrize: 
    * `category_main_cb` - `{'flat':1, 'house':2, 'land':3 }`
    * `category_type_cb` - `{'sell':1,'rent':2}`
    * `locality_region_id` - `{'Praha':10,'Brno':14}`
    * `page` parameter
* use string inputs for `category_main_cb` and `category_type_cb`
* test the validity of inputs
* include try/except clause to handle errors
* function should return JSON data in python types
* do not forget to sleep each request at least 0.5s

In [2]:
def request_sreality(page, category_main='flat', category_type='sell', locality_region='Praha'):
    
    time.sleep(0.5)

    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    region_mapping = {'Praha':10, 'Brno':14}
    
    if category_main not in category_mains:
        raise Exception(f'Unknown category main {category_main}')
    
    if category_type not in category_types:
        raise Exception(f'Unknown category type {category_type}')
    
    if locality_region not in region_mapping:
        raise Exception(f'Unknown locality region {locality_region}')
    
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
    
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=region_mapping[locality_region],
            page=page
        )

        r = requests.get(url)

        return r.json()
    except Exception as e:
        print(e)
d = request_sreality(0)
d.keys()

dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])

### 1b. Create a function converting sreality json data into pandas dataframe

In [3]:
def sreality_json_to_df(sreality_data):
    return pd.DataFrame(sreality_data['_embedded']['estates'])
sreality_json_to_df(d)

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[], []]",0,[],False,"[[new_building, personal, balcony, terrace, ce...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2485658700,0,22928000,"{'value_raw': 22928000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 156 m²,2444145,"{'lat': 50.0714224276386, 'lon': 14.4594775723...",False
1,"[[after_reconstruction, loggia, panel], []]",0,"[Po rekonstrukci, Lodžie, Panelová]",False,"[[personal, after_reconstruction, loggia, pane...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,37303372,0,9660000,"{'value_raw': 9660000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 95 m²,0,"{'lat': 50.118175427638604, 'lon': 14.49070557...",False
2,"[[panel], [shop, post_office]]",0,"[Panelová, Obchod 5 min. pěšky, Pošta 2 min. p...",False,"[[personal, panel, furnished], [playground, sm...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2556937292,0,3856000,"{'value_raw': 3856000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 34 m²,0,"{'lat': 50.043525427638606, 'lon': 14.51326157...",False
3,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,3499653212,0,8757000,"{'value_raw': 8757000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 59 m²,0,"{'lat': 50.074871427638605, 'lon': 14.49115157...",False
4,"[[], [metro, shop]]",0,"[Metro 2 min. pěšky, Obchod 3 min. pěšky]",False,"[[personal, balcony, brick], [candy_shop, vet,...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1991259212,0,11930000,"{'value_raw': 11930000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 88 m²,0,"{'lat': 50.064315427638604, 'lon': 14.47253557...",True
5,"[[], []]",0,[],False,"[[new_building, personal, elevator, garage], [...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2786600012,0,13284000,"{'value_raw': 13284000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 83 m²,0,"{'lat': 50.0190654276386, 'lon': 14.3932715723...",True
6,"[[collective], []]",0,[Družstevní],False,"[[new_building, collective, balcony, brick, pa...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,742011980,0,8926000,"{'value_raw': 8926000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 74 m²,0,"{'lat': 50.0777624276386, 'lon': 14.3112545723...",False
7,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, park...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2292900940,0,16057000,"{'value_raw': 16057000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 141 m²,0,"{'lat': 50.1519084276386, 'lon': 14.4968385723...",False
8,"[[], []]",0,[],False,"[[new_building, personal, balcony, brick, elev...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2077946956,0,12381000,"{'value_raw': 12381000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 77 m²,0,"{'lat': 50.065643427638605, 'lon': 14.50016557...",False
9,"[[], []]",0,[],False,"[[personal, balcony, terrace, brick, cellar, e...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2550781004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 296 m² (Mezonet),0,"{'lat': 50.070171427638606, 'lon': 14.45639857...",False


### 1c. link function `1b` into function `1a`

In [4]:
def request_sreality(page, category_main='flat', category_type='sell', locality_region='Praha'):
    
    time.sleep(0.5)

    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    region_mapping = {'Praha':10, 'Brno':14}
    
    if category_main not in category_mains:
        raise Exception(f'Unknown category main {category_main}')
    
    if category_type not in category_types:
        raise Exception(f'Unknown category type {category_type}')
    
    if locality_region not in region_mapping:
        raise Exception(f'Unknown locality region {locality_region}')
    
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
    
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=region_mapping[locality_region],
            page=page
        )

        r = requests.get(url)

        return sreality_json_to_df(r.json())    
    except Exception as e:
        print(e)
d = request_sreality(0)
d.keys()

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[], []]",0,[],False,"[[personal, balcony, terrace, cellar, elevator...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2720790348,0,21925000,"{'value_raw': 21925000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 180 m²,2493152,"{'lat': 50.1043584276386, 'lon': 14.4615885723...",False
1,"[[after_reconstruction, loggia, panel], []]",0,"[Po rekonstrukci, Lodžie, Panelová]",False,"[[personal, after_reconstruction, loggia, pane...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,37303372,0,9660000,"{'value_raw': 9660000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 95 m²,0,"{'lat': 50.118175427638604, 'lon': 14.49070557...",False
2,"[[panel], [shop, post_office]]",0,"[Panelová, Obchod 5 min. pěšky, Pošta 2 min. p...",False,"[[personal, panel, furnished], [playground, sm...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2556937292,0,3856000,"{'value_raw': 3856000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 34 m²,0,"{'lat': 50.043525427638606, 'lon': 14.51326157...",False
3,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,3499653212,0,8757000,"{'value_raw': 8757000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 59 m²,0,"{'lat': 50.074871427638605, 'lon': 14.49115157...",False
4,"[[], [metro, shop]]",0,"[Metro 2 min. pěšky, Obchod 3 min. pěšky]",False,"[[personal, balcony, brick], [candy_shop, vet,...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1991259212,0,11930000,"{'value_raw': 11930000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 88 m²,0,"{'lat': 50.064315427638604, 'lon': 14.47253557...",True
5,"[[], []]",0,[],False,"[[new_building, personal, elevator, garage], [...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2786600012,0,13284000,"{'value_raw': 13284000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 83 m²,0,"{'lat': 50.0190654276386, 'lon': 14.3932715723...",True
6,"[[collective], []]",0,[Družstevní],False,"[[new_building, collective, balcony, brick, pa...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,742011980,0,8926000,"{'value_raw': 8926000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 74 m²,0,"{'lat': 50.0777624276386, 'lon': 14.3112545723...",False
7,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, park...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2292900940,0,16057000,"{'value_raw': 16057000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 141 m²,0,"{'lat': 50.1519084276386, 'lon': 14.4968385723...",False
8,"[[], []]",0,[],False,"[[new_building, personal, balcony, brick, elev...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2077946956,0,12381000,"{'value_raw': 12381000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 77 m²,0,"{'lat': 50.065643427638605, 'lon': 14.50016557...",False
9,"[[], []]",0,[],False,"[[personal, balcony, terrace, brick, cellar, e...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2550781004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 296 m² (Mezonet),0,"{'lat': 50.070171427638606, 'lon': 14.45639857...",False


### 1c. Combining multiple requests into single df

* Function should parametrize:
    * `start_page` and `end_page`
    * request parameters
* construct a list of individual request dfs
* then feed it into `pd.concat` function

In [5]:
def multiple_sreality_requests(start_page,end_page,category_main='flat', category_type='sell', locality_region='Praha'):
    return pd.concat([
        request_sreality(i, category_main=category_main, category_type=category_type, locality_region=locality_region) 
        for i in range(start_page,end_page+1)
    ])

raw = multiple_sreality_requests(0,4)
raw.head()

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[], [post_office]]",0,[Pošta 8 min. pěšky],False,"[[new_building, personal, balcony, cellar, ele...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,891823180,0,8730000,"{'value_raw': 8730000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 56 m²,2444139,"{'lat': 50.07162042763861, 'lon': 14.459427572...",False
1,"[[after_reconstruction, loggia, panel], []]",0,"[Po rekonstrukci, Lodžie, Panelová]",False,"[[personal, after_reconstruction, loggia, pane...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,37303372,0,9660000,"{'value_raw': 9660000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 95 m²,0,"{'lat': 50.118175427638604, 'lon': 14.49070557...",False
2,"[[panel], [shop, post_office]]",0,"[Panelová, Obchod 5 min. pěšky, Pošta 2 min. p...",False,"[[personal, panel, furnished], [playground, sm...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2556937292,0,3856000,"{'value_raw': 3856000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 34 m²,0,"{'lat': 50.043525427638606, 'lon': 14.51326157...",False
3,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,3499653212,0,8757000,"{'value_raw': 8757000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 59 m²,0,"{'lat': 50.074871427638605, 'lon': 14.49115157...",False
4,"[[], [metro, shop]]",0,"[Metro 2 min. pěšky, Obchod 3 min. pěšky]",False,"[[personal, balcony, brick], [candy_shop, vet,...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1991259212,0,11930000,"{'value_raw': 11930000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 88 m²,0,"{'lat': 50.064315427638604, 'lon': 14.47253557...",True


## Task 2: Cleaning data

### 2a. Filter columns
* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']`
* use `.copy()` to avoid `SettingWithCopyWarning` later


In [6]:
clean = raw[['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']].copy()
clean.head()

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk
0,Praha 3 - Žižkov,8730000,Prodej bytu 1+kk 56 m²,"{'lat': 50.07162042763861, 'lon': 14.459427572...",891823180,"[[new_building, personal, balcony, cellar, ele...",1
1,Praha 8 - Kobylisy,9660000,Prodej bytu 3+1 95 m²,"{'lat': 50.118175427638604, 'lon': 14.49070557...",37303372,"[[personal, after_reconstruction, loggia, pane...",1
2,Praha 10 - Záběhlice,3856000,Prodej bytu 1+kk 34 m²,"{'lat': 50.043525427638606, 'lon': 14.51326157...",2556937292,"[[personal, panel, furnished], [playground, sm...",0
3,Praha 3 - Žižkov,8757000,Prodej bytu 2+kk 59 m²,"{'lat': 50.074871427638605, 'lon': 14.49115157...",3499653212,"[[new_building, personal, terrace, brick, cell...",1
4,Praha 3 - Vinohrady,11930000,Prodej bytu 2+kk 88 m²,"{'lat': 50.064315427638604, 'lon': 14.47253557...",1991259212,"[[personal, balcony, brick], [candy_shop, vet,...",1


### 2b: GPS
* Convert dictionary in `gps` column into two columns - `lat` and `lon`
* use apply function on gps column
* Note apply can return multiple columns

In [7]:
clean[['lat','lon']] = clean.gps.apply(lambda gps: pd.Series({'lat':gps['lat'], 'lon':gps['lon']}))

clean.head()

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon
0,Praha 3 - Žižkov,8730000,Prodej bytu 1+kk 56 m²,"{'lat': 50.07162042763861, 'lon': 14.459427572...",891823180,"[[new_building, personal, balcony, cellar, ele...",1,50.07162,14.459428
1,Praha 8 - Kobylisy,9660000,Prodej bytu 3+1 95 m²,"{'lat': 50.118175427638604, 'lon': 14.49070557...",37303372,"[[personal, after_reconstruction, loggia, pane...",1,50.118175,14.490706
2,Praha 10 - Záběhlice,3856000,Prodej bytu 1+kk 34 m²,"{'lat': 50.043525427638606, 'lon': 14.51326157...",2556937292,"[[personal, panel, furnished], [playground, sm...",0,50.043525,14.513262
3,Praha 3 - Žižkov,8757000,Prodej bytu 2+kk 59 m²,"{'lat': 50.074871427638605, 'lon': 14.49115157...",3499653212,"[[new_building, personal, terrace, brick, cell...",1,50.074871,14.491152
4,Praha 3 - Vinohrady,11930000,Prodej bytu 2+kk 88 m²,"{'lat': 50.064315427638604, 'lon': 14.47253557...",1991259212,"[[personal, balcony, brick], [candy_shop, vet,...",1,50.064315,14.472536


### 2b. Get flat type from name
* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`
* try picking third word in string
* check meaningfulness using `.value_counts()`

In [8]:
clean['flat_type'] = clean.name.apply(lambda nm: nm.split()[2])
clean

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon,flat_type
0,Praha 3 - Žižkov,8730000,Prodej bytu 1+kk 56 m²,"{'lat': 50.07162042763861, 'lon': 14.459427572...",891823180,"[[new_building, personal, balcony, cellar, ele...",1,50.071620,14.459428,1+kk
1,Praha 8 - Kobylisy,9660000,Prodej bytu 3+1 95 m²,"{'lat': 50.118175427638604, 'lon': 14.49070557...",37303372,"[[personal, after_reconstruction, loggia, pane...",1,50.118175,14.490706,3+1
2,Praha 10 - Záběhlice,3856000,Prodej bytu 1+kk 34 m²,"{'lat': 50.043525427638606, 'lon': 14.51326157...",2556937292,"[[personal, panel, furnished], [playground, sm...",0,50.043525,14.513262,1+kk
3,Praha 3 - Žižkov,8757000,Prodej bytu 2+kk 59 m²,"{'lat': 50.074871427638605, 'lon': 14.49115157...",3499653212,"[[new_building, personal, terrace, brick, cell...",1,50.074871,14.491152,2+kk
4,Praha 3 - Vinohrady,11930000,Prodej bytu 2+kk 88 m²,"{'lat': 50.064315427638604, 'lon': 14.47253557...",1991259212,"[[personal, balcony, brick], [candy_shop, vet,...",1,50.064315,14.472536,2+kk
...,...,...,...,...,...,...,...,...,...,...
16,Praha 5 - Smíchov,23860000,Prodej bytu 2+kk 153 m² (Loft),"{'lat': 50.06166042763861, 'lon': 14.410449572...",1242051660,"[[new_building, personal, balcony, loggia, ter...",0,50.061660,14.410450,2+kk
17,Praha 3 - Žižkov,10950000,Prodej bytu 2+kk 88 m²,"{'lat': 50.06988942763861, 'lon': 14.479720572...",2047358028,"[[new_building, personal, cellar, elevator, pa...",0,50.069889,14.479721,2+kk
18,Praha 1 - Nové Město,12123000,Prodej bytu 2+kk 67 m²,"{'lat': 50.07125442763861, 'lon': 14.443971572...",2584228940,"[[personal, brick, cellar, elevator, furnished...",0,50.071254,14.443972,2+kk
19,Praha 9 - Letňany,16753000,Prodej bytu 3+kk 116 m²,"{'lat': 50.119968427638604, 'lon': 14.51837357...",403190860,"[[new_building, personal, balcony, loggia, ter...",0,50.119968,14.518374,3+kk


### 2c. Get area from name
* Naive: select the word before last word
* Then try navigating using the index of `'m²'`
* if this also fail, then you will need to use regex

In [9]:
def name_to_area(nm):
    splitted= nm.split()
    
    m2_idx = splitted.index('m²')
    
    return int(splitted[m2_idx-1])

clean['area'] = clean.name.apply(name_to_area)
clean

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon,flat_type,area
0,Praha 3 - Žižkov,8730000,Prodej bytu 1+kk 56 m²,"{'lat': 50.07162042763861, 'lon': 14.459427572...",891823180,"[[new_building, personal, balcony, cellar, ele...",1,50.071620,14.459428,1+kk,56
1,Praha 8 - Kobylisy,9660000,Prodej bytu 3+1 95 m²,"{'lat': 50.118175427638604, 'lon': 14.49070557...",37303372,"[[personal, after_reconstruction, loggia, pane...",1,50.118175,14.490706,3+1,95
2,Praha 10 - Záběhlice,3856000,Prodej bytu 1+kk 34 m²,"{'lat': 50.043525427638606, 'lon': 14.51326157...",2556937292,"[[personal, panel, furnished], [playground, sm...",0,50.043525,14.513262,1+kk,34
3,Praha 3 - Žižkov,8757000,Prodej bytu 2+kk 59 m²,"{'lat': 50.074871427638605, 'lon': 14.49115157...",3499653212,"[[new_building, personal, terrace, brick, cell...",1,50.074871,14.491152,2+kk,59
4,Praha 3 - Vinohrady,11930000,Prodej bytu 2+kk 88 m²,"{'lat': 50.064315427638604, 'lon': 14.47253557...",1991259212,"[[personal, balcony, brick], [candy_shop, vet,...",1,50.064315,14.472536,2+kk,88
...,...,...,...,...,...,...,...,...,...,...,...
16,Praha 5 - Smíchov,23860000,Prodej bytu 2+kk 153 m² (Loft),"{'lat': 50.06166042763861, 'lon': 14.410449572...",1242051660,"[[new_building, personal, balcony, loggia, ter...",0,50.061660,14.410450,2+kk,153
17,Praha 3 - Žižkov,10950000,Prodej bytu 2+kk 88 m²,"{'lat': 50.06988942763861, 'lon': 14.479720572...",2047358028,"[[new_building, personal, cellar, elevator, pa...",0,50.069889,14.479721,2+kk,88
18,Praha 1 - Nové Město,12123000,Prodej bytu 2+kk 67 m²,"{'lat': 50.07125442763861, 'lon': 14.443971572...",2584228940,"[[personal, brick, cellar, elevator, furnished...",0,50.071254,14.443972,2+kk,67
19,Praha 9 - Letňany,16753000,Prodej bytu 3+kk 116 m²,"{'lat': 50.119968427638604, 'lon': 14.51837357...",403190860,"[[new_building, personal, balcony, loggia, ter...",0,50.119968,14.518374,3+kk,116


## Task 3 (Homework): Convert column`labelsAll` into boolean variables

### Task 3a. Get all possible label names
* deal with nested-list structure
* Hint: try sum the whole column to get a nested list of lists.
* Then flatten the nested list (2D to 1D)
* Finally keep only unique elements


In [10]:
possible_labels = list(set([item for sublist in raw.labelsAll.sum() for item in sublist]))
possible_labels

['after_reconstruction',
 'new_building',
 'cellar',
 'candy_shop',
 'sports',
 'drugstore',
 'collective',
 'elevator',
 'playground',
 'kindergarten',
 'metro',
 'loggia',
 'sightseeing',
 'movies',
 'school',
 'balcony',
 'in_construction',
 'brick',
 'natural_attraction',
 'not_furnished',
 'bus_public_transport',
 'small_shop',
 'restaurant',
 'panel',
 'tavern',
 'post_office',
 'vet',
 'atm',
 'garage',
 'furnished',
 'terrace',
 'shop',
 'partly_furnished',
 'medic',
 'theater',
 'parking_lots',
 'train',
 'tram',
 'personal']

### 4b. Test existence of label `cellar` for offers
* again deal with nested list of list structure
* write generic function `test_existence_of_label(offer_labels,label)`

In [22]:
def test_existence_of_label(offer_labels,label):
    return label in [item for sublist in offer_labels for item in sublist]
raw.labelsAll.apply(lambda offer_labels: test_existence_of_label(offer_labels, 'cellar'))

0      True
1     False
2     False
3      True
4     False
      ...  
16     True
17     True
18     True
19     True
20     True
Name: labelsAll, Length: 105, dtype: bool

### 4c. Test existence of all possible labels
* use apply returning series with all labels

In [23]:
def existence_of_all_labels(offer_labels, possible_labels):
    return pd.Series({
        label:test_existence_of_label(offer_labels,label)
        for label in possible_labels
    })

raw.labelsAll.apply(lambda offer_labels: existence_of_all_labels(offer_labels, possible_labels))

Unnamed: 0,after_reconstruction,new_building,cellar,candy_shop,sports,drugstore,collective,elevator,playground,kindergarten,...,furnished,terrace,shop,partly_furnished,medic,theater,parking_lots,train,tram,personal
0,False,True,True,True,True,True,False,True,True,True,...,False,False,True,False,True,True,True,True,True,True
1,True,False,False,True,True,True,False,True,True,True,...,True,False,True,False,True,True,False,True,True,True
2,False,False,False,True,True,True,False,False,True,True,...,True,False,True,False,True,True,False,True,True,True
3,False,True,True,True,True,True,False,False,True,True,...,True,True,True,False,True,True,False,True,True,True
4,False,False,False,True,True,True,False,False,True,True,...,False,False,True,False,True,True,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,False,True,True,True,True,True,False,True,True,True,...,True,True,True,False,True,True,True,True,True,True
17,False,True,True,True,True,True,False,True,True,True,...,True,False,True,False,True,True,True,True,True,True
18,False,False,True,True,True,True,False,True,True,True,...,True,False,True,False,True,True,False,True,True,True
19,False,True,True,True,True,True,False,True,True,True,...,True,True,True,False,True,True,True,True,True,True
