# AirBnB Data Transformations Hands On - Py
## By Nolan Hardeman

## Import packages

In [1]:
import pandas as pd
import numpy as np

## Import Data

In [3]:
airbnb_test_users = pd.read_csv('../Data/airbnb_test_users.csv')

In [4]:
airbnb_test_users.head()

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser
0,5uwns89zht,7/1/2014,20140700000000.0,,FEMALE,35.0,facebook,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari
1,jtl0dijy2j,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari
2,xx0ulgorjt,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,Chrome
3,6c6puo6ix0,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,IE
4,czqhjk3yfe,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Safari


In [5]:
print(airbnb_test_users.columns)

Index(['id', 'date_account_created', 'timestamp_first_active',
       'date_first_booking', 'gender', 'age', 'signup_method', 'signup_flow',
       'language', 'affiliate_channel', 'affiliate_provider',
       'first_affiliate_tracked', 'signup_app', 'first_device_type',
       'first_browser'],
      dtype='object')


## What is the average age of those who use each web browser type?

In [6]:
airbnb_test_users.groupby('first_browser')['age'].mean()

first_browser
-unknown-             41.526894
AOL Explorer          60.000000
Android Browser       42.822148
Apple Mail            26.750000
BlackBerry Browser    34.600000
Chrome                34.591678
Chrome Mobile         45.199058
Chromium              28.833333
CometBird             30.000000
Firefox               37.748143
IBrowse               51.000000
IE                    42.463107
IE Mobile             32.312500
IceWeasel             18.000000
Iron                  35.200000
Maxthon               31.000000
Mobile Firefox        31.800000
Mobile Safari         35.098486
Nintendo Browser            NaN
Opera                 38.444444
Opera Mini                  NaN
Opera Mobile          37.000000
Pale Moon             37.000000
Safari                36.416121
SeaMonkey                   NaN
Silk                  38.120000
SiteKiosk             57.000000
Sogou Explorer        28.000000
UC Browser            28.000000
Yandex.Browser        38.500000
wOSBrowser                

## What is the total signup_flow for each device?

In [7]:
airbnb_test_users.groupby('first_device_type')['signup_flow'].sum()

first_device_type
Android Phone          90521
Android Tablet          5969
Desktop (Other)           50
Mac Desktop             5736
Other/Unknown           4826
SmartPhone (Other)         0
Windows Desktop         3866
iPad                   28900
iPhone                345343
Name: signup_flow, dtype: int64

## Import 2 more data sets from hands on

In [8]:
airbnb_sample_submission = pd.read_csv('../Data/airbnb_sample_submission.csv')

In [9]:
airbnb_users = pd.read_csv('../Data/airbnb_users.csv')

In [10]:
airbnb_sample_submission.head()

Unnamed: 0,id,country
0,5uwns89zht,NDF
1,jtl0dijy2j,NDF
2,xx0ulgorjt,NDF
3,6c6puo6ix0,NDF
4,czqhjk3yfe,NDF


In [11]:
airbnb = pd.merge(airbnb_test_users, airbnb_sample_submission, on = 'id')

In [12]:
airbnb.head()

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country
0,5uwns89zht,7/1/2014,20140700000000.0,,FEMALE,35.0,facebook,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
1,jtl0dijy2j,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
2,xx0ulgorjt,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,Chrome,NDF
3,6c6puo6ix0,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,IE,NDF
4,czqhjk3yfe,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Safari,NDF


In [13]:
airbnb_users.head()

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
0,gxn3p5htnn,6/28/2010,20090300000000.0,,-unknown-,,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,NDF
1,820tgsjxq7,5/25/2011,20090500000000.0,,MALE,38.0,facebook,0,en,seo,google,untracked,Web,Mac Desktop,Chrome,NDF
2,4ft3gnwmtx,9/28/2010,20090600000000.0,8/2/2010,FEMALE,56.0,basic,3,en,direct,direct,untracked,Web,Windows Desktop,IE,US
3,bjjt8pjhuk,12/5/2011,20091000000000.0,9/8/2012,FEMALE,42.0,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Firefox,other
4,87mebub9p4,9/14/2010,20091200000000.0,2/18/2010,-unknown-,41.0,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,US


## Rename country column before appending users

In [14]:
airbnb.rename(columns={'country' : 'country_destination'}, inplace = True)

## Add users to airbnb file

In [15]:
airbnb_all = pd.concat([airbnb, airbnb_users], ignore_index = True)
airbnb_all.head()

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
0,5uwns89zht,7/1/2014,20140700000000.0,,FEMALE,35.0,facebook,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
1,jtl0dijy2j,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
2,xx0ulgorjt,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,Chrome,NDF
3,6c6puo6ix0,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,IE,NDF
4,czqhjk3yfe,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Safari,NDF


###### fin.