In [1]:
import os
import pandas as pd

users = pd.read_csv(os.path.join('data', 'users.csv'), index_col=0)
# Pop out a quick sanity check
len(users)

475

In [2]:
# This vectorized comparison returns a new `Series` ... 
#   We are naming it so we can use it later.
no_referrals_index = users['referral_count'] < 1
# See how the boolean `Series` returned includes all rows from the `DataFrame`.
#  The value is the result of each comparison
no_referrals_index.head()

aaron            False
acook            False
adam.saunders    False
adrian           False
adrian.blair     False
Name: referral_count, dtype: bool

In [3]:
#all rows from useres where no_referrals_index was true
users[no_referrals_index].head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
alan9443,Alan,Pope,pope@hotmail.com,True,2018-04-17,0,56.09
andrew.alvarez,Andrew,Alvarez,aalvarez@hotmail.com,False,2018-08-01,0,81.66
boyer7005,Sara,Boyer,boyer8636@gmail.com,True,2018-07-31,0,91.41
brandon.gilbert,Brandon,Gilbert,brandon.gilbert@hotmail.com,True,2018-04-28,0,10.17
brooke2027,Brooke,,brooke6938@gmail.com,False,2018-05-23,0,7.22


In [4]:
# ~ is called bitwise not operator, chooses false values into trues
# Careful, double negative here. We don't need no education.
~no_referrals_index.head()

aaron            True
acook            True
adam.saunders    True
adrian           True
adrian.blair     True
Name: referral_count, dtype: bool

In [5]:
# Use the inverse of the index to find where referral values DO NOT equal zero
users[~no_referrals_index].head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
aaron,Aaron,Davis,aaron6348@gmail.com,True,2018-08-31,6,18.14
acook,Anthony,Cook,cook@gmail.com,True,2018-05-12,2,55.45
adam.saunders,Adam,Saunders,adam@gmail.com,False,2018-05-29,3,72.12
adrian,Adrian,Fang,adrian.fang@teamtreehouse.com,True,2018-04-28,3,30.01
adrian.blair,Adrian,Blair,adrian9335@gmail.com,True,2018-06-16,7,25.85


In [6]:
# Select rows where there are no referrals, and select only the following ordered columns
users.loc[no_referrals_index, ['balance', 'email']].head()

Unnamed: 0,balance,email
alan9443,56.09,pope@hotmail.com
andrew.alvarez,81.66,aalvarez@hotmail.com
boyer7005,91.41,boyer8636@gmail.com
brandon.gilbert,10.17,brandon.gilbert@hotmail.com
brooke2027,7.22,brooke6938@gmail.com


In [7]:
#way to do the comparision inline(in one line?) without storing index in a variable
users[users['referral_count'] == 0].head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
alan9443,Alan,Pope,pope@hotmail.com,True,2018-04-17,0,56.09
andrew.alvarez,Andrew,Alvarez,aalvarez@hotmail.com,False,2018-08-01,0,81.66
boyer7005,Sara,Boyer,boyer8636@gmail.com,True,2018-07-31,0,91.41
brandon.gilbert,Brandon,Gilbert,brandon.gilbert@hotmail.com,True,2018-04-28,0,10.17
brooke2027,Brooke,,brooke6938@gmail.com,False,2018-05-23,0,7.22


In [8]:
# just like NumPy ndarray it is possible for a boolean series to be compared to another boolean series
# Select all users where they haven't made a referral AND their email has been verified
users[(users['referral_count'] == 0) & (users['email_verified'] == True)].head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
alan9443,Alan,Pope,pope@hotmail.com,True,2018-04-17,0,56.09
boyer7005,Sara,Boyer,boyer8636@gmail.com,True,2018-07-31,0,91.41
brandon.gilbert,Brandon,Gilbert,brandon.gilbert@hotmail.com,True,2018-04-28,0,10.17
bryant,Darlene,Bryant,dbryant@yahoo.com,True,2018-07-19,0,36.91
calvin.perez,Calvin,Perez,cperez@gmail.com,True,2018-02-17,0,13.01


## Challenge 1 Top Referrers

In [10]:
# Setup
import os
import pandas as pd

from tests.helpers import check

pd.options.display.max_rows = 10
users = pd.read_csv(os.path.join('data', 'users.csv'), index_col=0)
# Pop out a quick sanity check
len(users)

475

In [12]:
users[users['referral_count']>4].head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
aaron,Aaron,Davis,aaron6348@gmail.com,True,2018-08-31,6,18.14
adrian.blair,Adrian,Blair,adrian9335@gmail.com,True,2018-06-16,7,25.85
alvarado,Denise,Alvarado,alvarado@hotmail.com,True,2018-09-07,6,26.72
alvarez,John,Alvarez,john4346@hotmail.com,True,2018-09-18,6,49.62
amiller,Anne,Miller,miller@hotmail.com,False,2018-06-02,5,86.28


In [None]:
## CHALLENGE - Find the top referrers ##
# TODO: Select users that have a referral count greater than or equal to 5 and have verified emails

# Return the dataframe
users