In [1]:
import requests
import pandas as pd
import time
import sqlite3

# Seminar - APIs, DBs and Live coding

## Task 1: Requesting API
### 1a. Create a function requesting data from sreality

```python
base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(i)

r = requests.get(base_url)
d = r.json()
```

* function should parametrize: 
    * `category_main_cb` - `{'flat':1, 'house':2, 'land':3 }`
    * `category_type_cb` - `{'sell':1,'rent':2}`
    * `locality_region_id` - use 10 as default value
    * `page` parameter
* use string inputs for `category_main_cb` and `category_type_cb`
* include try/except clause to handle errors
* function should return JSON data in python types
* do not forget to sleep each request at least 0.5s

In [2]:
def request_sreality(page, category_main='flat', category_type='sell', locality_region_id=10):
    time.sleep(0.5)
    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
    
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=locality_region_id,
            page=page
        )

        r = requests.get(url)

        return r.json()
    except Exception as e:
        print(e)
d = request_sreality(0)
d.keys()

dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])

### 1b. Create a function converting sreality json data into pandas dataframe

In [3]:
def sreality_json_to_df(sreality_data):
    return pd.DataFrame(sreality_data['_embedded']['estates'])
sreality_json_to_df(d)

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[loggia, panel, parking_lots], []]",0,"[Lodžie, Panelová, Parkování]",False,"[[personal, after_reconstruction, loggia, pane...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1695245900,0,5104000,"{'value_raw': 5104000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 59 m²,2041904,"{'lat': 50.01708241707067, 'lon': 14.464781582...",False
1,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,859952716,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 121 m²,0,"{'lat': 50.084148417070665, 'lon': 14.46689458...",False
2,"[[], [medic]]",0,[Lékař 3 min. pěšky],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2555258700,0,19532000,"{'value_raw': 19532000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 69 m²,0,"{'lat': 50.067760417070666, 'lon': 14.42525858...",False
3,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3188635468,0,16931000,"{'value_raw': 16931000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 91 m²,0,"{'lat': 50.066925417070664, 'lon': 14.44659058...",False
4,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3556099916,0,7668000,"{'value_raw': 7668000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 49 m²,0,"{'lat': 50.10141041707067, 'lon': 14.508370582...",False
5,"[[], [train, medic]]",0,"[Vlak 7 min. pěšky, Lékař 7 min. pěšky]",False,"[[new_building, personal, brick, elevator, gar...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3741402700,0,6397000,"{'value_raw': 6397000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 51 m²,0,"{'lat': 50.10141041707067, 'lon': 14.508370582...",False
6,"[[], []]",0,[],False,"[[new_building, personal, balcony, terrace, ce...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,693262684,0,11514000,"{'value_raw': 11514000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 75 m²,0,"{'lat': 50.001545417070666, 'lon': 14.41546158...",False
7,"[[], [medic]]",0,[Lékař 5 min. pěšky],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,816449884,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 104 m²,0,"{'lat': 50.06960341707067, 'lon': 14.426915582...",False
8,"[[], []]",0,[],False,"[[new_building, personal, terrace, elevator, g...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1077777996,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 127 m² (Mezonet),0,"{'lat': 50.09260941707067, 'lon': 14.408651582...",False
9,"[[parking_lots, furnished], []]",0,"[Parkování, Vybavený]",False,"[[personal, after_reconstruction, balcony, bri...","{'category_main_cb': 1, 'category_sub_cb': 11,...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,1599015004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+1 196 m²,0,"{'lat': 50.066925417070664, 'lon': 14.44659058...",False


### 1c. link function `1b` into function `1a`

In [4]:
def request_sreality(page, category_main='flat', category_type='sell', locality_region_id=10):
    time.sleep(0.5)
    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
    
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=locality_region_id,
            page=page
        )

        r = requests.get(url)

        return sreality_json_to_df(r.json())
    except Exception as e:
        print(e)
df = request_sreality(0)
df

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[parking_lots, partly_furnished], []]",0,"[Parkování, Částečně vybavený]",False,"[[personal, balcony, brick, cellar, parking_lo...","{'category_main_cb': 1, 'category_sub_cb': 5, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,3874371404,0,5714000,"{'value_raw': 5714000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+1 47 m²,2041904,"{'lat': 50.083824417070666, 'lon': 14.46469558...",False
1,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,859952716,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 121 m²,0,"{'lat': 50.084148417070665, 'lon': 14.46689458...",False
2,"[[], [medic]]",0,[Lékař 3 min. pěšky],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2555258700,0,19532000,"{'value_raw': 19532000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 69 m²,0,"{'lat': 50.067760417070666, 'lon': 14.42525858...",False
3,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3188635468,0,16931000,"{'value_raw': 16931000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 91 m²,0,"{'lat': 50.066925417070664, 'lon': 14.44659058...",False
4,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3556099916,0,7668000,"{'value_raw': 7668000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 49 m²,0,"{'lat': 50.10141041707067, 'lon': 14.508370582...",False
5,"[[], [train, medic]]",0,"[Vlak 7 min. pěšky, Lékař 7 min. pěšky]",False,"[[new_building, personal, brick, elevator, gar...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3741402700,0,6397000,"{'value_raw': 6397000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 51 m²,0,"{'lat': 50.10141041707067, 'lon': 14.508370582...",False
6,"[[], []]",0,[],False,"[[new_building, personal, balcony, terrace, ce...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,693262684,0,11514000,"{'value_raw': 11514000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 75 m²,0,"{'lat': 50.001545417070666, 'lon': 14.41546158...",False
7,"[[], [medic]]",0,[Lékař 5 min. pěšky],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,816449884,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 104 m²,0,"{'lat': 50.06960341707067, 'lon': 14.426915582...",False
8,"[[], []]",0,[],False,"[[new_building, personal, terrace, elevator, g...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1077777996,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 127 m² (Mezonet),0,"{'lat': 50.09260941707067, 'lon': 14.408651582...",False
9,"[[parking_lots, furnished], []]",0,"[Parkování, Vybavený]",False,"[[personal, after_reconstruction, balcony, bri...","{'category_main_cb': 1, 'category_sub_cb': 11,...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,1599015004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+1 196 m²,0,"{'lat': 50.066925417070664, 'lon': 14.44659058...",False


### 1c. Combining multiple requests into single df

* Function should parametrize:
    * `start_page` and `end_page`
    * request parameters
* construct a list of individual request dfs
* then feed it into `pd.concat` function

In [5]:
def multiple_sreality_requests(start_page,end_page,category_main='flat', category_type='sell', locality_region_id=10):
    return pd.concat([
        request_sreality(i, category_main=category_main, category_type=category_type, locality_region_id=locality_region_id) 
        for i in range(start_page,end_page+1)
    ])

raw = multiple_sreality_requests(0,4)
raw.head()

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[loggia, panel, parking_lots], []]",0,"[Lodžie, Panelová, Parkování]",False,"[[personal, after_reconstruction, loggia, pane...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1695245900,0,5066000,"{'value_raw': 5066000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 59 m²,2041904,"{'lat': 50.01708241707067, 'lon': 14.464781582...",False
1,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 6, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,859952716,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+kk 121 m²,0,"{'lat': 50.084148417070665, 'lon': 14.46689458...",False
2,"[[], [medic]]",0,[Lékař 3 min. pěšky],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2555258700,0,19532000,"{'value_raw': 19532000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 69 m²,0,"{'lat': 50.067760417070666, 'lon': 14.42525858...",False
3,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3188635468,0,16931000,"{'value_raw': 16931000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 91 m²,0,"{'lat': 50.066925417070664, 'lon': 14.44659058...",False
4,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3556099916,0,7668000,"{'value_raw': 7668000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 49 m²,0,"{'lat': 50.10141041707067, 'lon': 14.508370582...",False


## Task 2: Cleaning data

### 2a. Filter columns
* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']`
* use `.copy()` to avoid `SettingWithCopyWarning` later


In [6]:
clean = raw[['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']].copy()
clean.head()

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk
0,Praha 4 - Krč,5066000,Prodej bytu 3+1 59 m²,"{'lat': 50.01708241707067, 'lon': 14.464781582...",1695245900,"[[personal, after_reconstruction, loggia, pane...",0
1,Praha 8 - Karlín,0,Prodej bytu 3+kk 121 m²,"{'lat': 50.084148417070665, 'lon': 14.46689458...",859952716,"[[new_building, personal, terrace, brick, cell...",1
2,Praha 1 - Nové Město,19532000,Prodej bytu 2+kk 69 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2555258700,"[[new_building, personal, balcony, brick, cell...",0
3,Praha 2 - Vinohrady,16931000,Prodej bytu 4+kk 91 m²,"{'lat': 50.066925417070664, 'lon': 14.44659058...",3188635468,"[[personal, after_reconstruction, brick, eleva...",0
4,Praha 9 - Vysočany,7668000,Prodej bytu 2+kk 49 m²,"{'lat': 50.10141041707067, 'lon': 14.508370582...",3556099916,"[[personal, after_reconstruction, brick, cella...",0


### 2b: GPS
* Convert dictionary in `gps` column into two columns - `lat` and `lon`
* use apply function on gps column
* Note apply can return multiple columns

In [7]:
clean[['lat','lon']] = clean.gps.apply(lambda gps: pd.Series({'lat':gps['lat'], 'lon':gps['lon']}))

clean.head()

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon
0,Praha 4 - Krč,5066000,Prodej bytu 3+1 59 m²,"{'lat': 50.01708241707067, 'lon': 14.464781582...",1695245900,"[[personal, after_reconstruction, loggia, pane...",0,50.017082,14.464782
1,Praha 8 - Karlín,0,Prodej bytu 3+kk 121 m²,"{'lat': 50.084148417070665, 'lon': 14.46689458...",859952716,"[[new_building, personal, terrace, brick, cell...",1,50.084148,14.466895
2,Praha 1 - Nové Město,19532000,Prodej bytu 2+kk 69 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2555258700,"[[new_building, personal, balcony, brick, cell...",0,50.06776,14.425259
3,Praha 2 - Vinohrady,16931000,Prodej bytu 4+kk 91 m²,"{'lat': 50.066925417070664, 'lon': 14.44659058...",3188635468,"[[personal, after_reconstruction, brick, eleva...",0,50.066925,14.446591
4,Praha 9 - Vysočany,7668000,Prodej bytu 2+kk 49 m²,"{'lat': 50.10141041707067, 'lon': 14.508370582...",3556099916,"[[personal, after_reconstruction, brick, cella...",0,50.10141,14.508371


### 2b. Get flat type from name
* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`
* try picking third word in string
* check meaningfulness using `.value_counts()`

In [8]:
clean['flat_type'] = clean.name.apply(lambda nm: nm.split()[2])
clean

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon,flat_type
0,Praha 4 - Krč,5066000,Prodej bytu 3+1 59 m²,"{'lat': 50.01708241707067, 'lon': 14.464781582...",1695245900,"[[personal, after_reconstruction, loggia, pane...",0,50.017082,14.464782,3+1
1,Praha 8 - Karlín,0,Prodej bytu 3+kk 121 m²,"{'lat': 50.084148417070665, 'lon': 14.46689458...",859952716,"[[new_building, personal, terrace, brick, cell...",1,50.084148,14.466895,3+kk
2,Praha 1 - Nové Město,19532000,Prodej bytu 2+kk 69 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2555258700,"[[new_building, personal, balcony, brick, cell...",0,50.067760,14.425259,2+kk
3,Praha 2 - Vinohrady,16931000,Prodej bytu 4+kk 91 m²,"{'lat': 50.066925417070664, 'lon': 14.44659058...",3188635468,"[[personal, after_reconstruction, brick, eleva...",0,50.066925,14.446591,4+kk
4,Praha 9 - Vysočany,7668000,Prodej bytu 2+kk 49 m²,"{'lat': 50.10141041707067, 'lon': 14.508370582...",3556099916,"[[personal, after_reconstruction, brick, cella...",0,50.101410,14.508371,2+kk
...,...,...,...,...,...,...,...,...,...,...
16,Praha 6 - Břevnov,0,Prodej bytu 3+1 62 m²,"{'lat': 50.07464941707067, 'lon': 14.366944582...",1373627980,"[[personal, after_reconstruction, loggia, bric...",1,50.074649,14.366945,3+1
17,Praha 1 - Nové Město,24309000,Prodej bytu 5+kk 135 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2023106380,"[[personal, terrace, brick, elevator], [playgr...",0,50.067760,14.425259,5+kk
18,Praha 6 - Řepy,8230000,Prodej bytu 4+1 81 m²,"{'lat': 50.05617541707066, 'lon': 14.323144582...",3490281036,"[[personal, balcony, panel, cellar, elevator],...",0,50.056175,14.323145,4+1
19,Praha 10 - Malešice,9365000,Prodej bytu 3+kk 86 m²,"{'lat': 50.07307041707067, 'lon': 14.503517582...",766834252,"[[personal, brick, cellar], [small_shop, taver...",0,50.073070,14.503518,3+kk


### 2c. Get area from name
* Naive: select the word before last word
* Then try navigating using the index of `'m²'`
* if this also fail, then you will need to use regex

In [9]:
def name_to_area(nm):
    splitted= nm.split()
    
    m2_idx = splitted.index('m²')
    
    return int(splitted[m2_idx-1])

clean['area'] = clean.name.apply(name_to_area)
clean

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon,flat_type,area
0,Praha 4 - Krč,5066000,Prodej bytu 3+1 59 m²,"{'lat': 50.01708241707067, 'lon': 14.464781582...",1695245900,"[[personal, after_reconstruction, loggia, pane...",0,50.017082,14.464782,3+1,59
1,Praha 8 - Karlín,0,Prodej bytu 3+kk 121 m²,"{'lat': 50.084148417070665, 'lon': 14.46689458...",859952716,"[[new_building, personal, terrace, brick, cell...",1,50.084148,14.466895,3+kk,121
2,Praha 1 - Nové Město,19532000,Prodej bytu 2+kk 69 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2555258700,"[[new_building, personal, balcony, brick, cell...",0,50.067760,14.425259,2+kk,69
3,Praha 2 - Vinohrady,16931000,Prodej bytu 4+kk 91 m²,"{'lat': 50.066925417070664, 'lon': 14.44659058...",3188635468,"[[personal, after_reconstruction, brick, eleva...",0,50.066925,14.446591,4+kk,91
4,Praha 9 - Vysočany,7668000,Prodej bytu 2+kk 49 m²,"{'lat': 50.10141041707067, 'lon': 14.508370582...",3556099916,"[[personal, after_reconstruction, brick, cella...",0,50.101410,14.508371,2+kk,49
...,...,...,...,...,...,...,...,...,...,...,...
16,Praha 6 - Břevnov,0,Prodej bytu 3+1 62 m²,"{'lat': 50.07464941707067, 'lon': 14.366944582...",1373627980,"[[personal, after_reconstruction, loggia, bric...",1,50.074649,14.366945,3+1,62
17,Praha 1 - Nové Město,24309000,Prodej bytu 5+kk 135 m²,"{'lat': 50.067760417070666, 'lon': 14.42525858...",2023106380,"[[personal, terrace, brick, elevator], [playgr...",0,50.067760,14.425259,5+kk,135
18,Praha 6 - Řepy,8230000,Prodej bytu 4+1 81 m²,"{'lat': 50.05617541707066, 'lon': 14.323144582...",3490281036,"[[personal, balcony, panel, cellar, elevator],...",0,50.056175,14.323145,4+1,81
19,Praha 10 - Malešice,9365000,Prodej bytu 3+kk 86 m²,"{'lat': 50.07307041707067, 'lon': 14.503517582...",766834252,"[[personal, brick, cellar], [small_shop, taver...",0,50.073070,14.503518,3+kk,86


## Task 3: Persist to sqlite

In [10]:
con = sqlite3.connect('sreality.sqlite')
df.to_sql(name = 'estates', con= con, index = False)

OperationalError: database is locked

## Task 4 (Homework): Convert `labelsAll` into categorical variables

### Task 4a. Get all possible label names
* deal with nested-list structure
* Hint: try to sum the whole column
* Needed to Iterate through all labels in all rows and 

In [11]:
possible_labels = list(set([item for sublist in raw.labelsAll.sum() for item in sublist]))
possible_labels

['loggia',
 'playground',
 'movies',
 'candy_shop',
 'natural_attraction',
 'parking_lots',
 'restaurant',
 'personal',
 'partly_furnished',
 'collective',
 'in_construction',
 'train',
 'balcony',
 'furnished',
 'drugstore',
 'medic',
 'atm',
 'vet',
 'panel',
 'after_reconstruction',
 'elevator',
 'tavern',
 'theater',
 'terrace',
 'school',
 'not_furnished',
 'new_building',
 'tram',
 'small_shop',
 'post_office',
 'kindergarten',
 'sports',
 'bus_public_transport',
 'brick',
 'metro',
 'shop',
 'garage',
 'cellar']

### 4b. Test existence of label `cellar` for offers
* again deal with nested list of list structure
* write generic function `test_existence_of_label(offer_labels,label)`

In [12]:
def test_existence_of_label(offer_labels,label):
    return label in [item for sublist in offer_labels for item in sublist]
raw.labelsAll.apply(lambda offer_labels: test_existence_of_label(offer_labels, 'cellar'))

0      True
1      True
2      True
3     False
4      True
      ...  
16    False
17    False
18     True
19     True
20     True
Name: labelsAll, Length: 105, dtype: bool

### 4c. Test existence of all possible labels
* use apply returning series with all labels

In [13]:
def existence_of_all_labels(offer_labels, possible_labels):
    return pd.Series({
        label:test_existence_of_label(offer_labels,label)
        for label in possible_labels
    })

raw.labelsAll.apply(lambda offer_labels: existence_of_all_labels(offer_labels, possible_labels))

Unnamed: 0,loggia,playground,movies,candy_shop,natural_attraction,parking_lots,restaurant,personal,partly_furnished,collective,...,small_shop,post_office,kindergarten,sports,bus_public_transport,brick,metro,shop,garage,cellar
0,True,True,True,True,True,True,True,True,True,False,...,True,True,True,True,True,False,True,True,False,True
1,False,True,True,True,False,False,True,True,False,False,...,True,True,True,True,True,True,True,True,True,True
2,False,True,True,True,True,False,True,True,False,False,...,True,True,True,True,True,True,True,True,True,True
3,False,True,True,True,False,False,True,True,False,False,...,True,True,True,True,True,True,True,True,False,False
4,False,True,True,True,True,False,True,True,False,False,...,True,True,True,True,True,True,True,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,True,True,True,True,True,False,True,True,True,False,...,True,True,True,True,True,True,True,True,False,False
17,False,True,True,True,False,False,True,True,False,False,...,True,True,True,True,True,True,True,True,False,False
18,False,True,True,True,True,False,True,True,False,False,...,True,True,True,True,True,False,True,True,False,True
19,False,True,True,True,True,False,True,True,False,False,...,True,True,True,True,True,True,True,True,False,True
