In [1]:
# Setup
from datetime import datetime
import os

import numpy as np
import pandas as pd

users = pd.read_csv(os.path.join('data', 'users.csv'), index_col=0)
transactions = pd.read_csv(os.path.join('data', 'transactions.csv'), index_col=0)
# Pop out a quick sanity check
(users.shape, transactions.shape)

((475, 7), (998, 4))

In [2]:
#using chaining (don't do this it is bad)
# example adrian fang calls us and says his balance is in error it is not 30.01 but should be $35
# First let's make sure there is only one Adrian Fang
users[(users.first_name == "Adrian") & (users.last_name == "Fang")]

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
adrian,Adrian,Fang,adrian.fang@teamtreehouse.com,True,2018-04-28,3,30.01


In [3]:
#chaining to get balance and setting it to new value 
#this gives warning because we should not attempt to chain and assign a value this way
users[(users.first_name == "Adrian") & (users.last_name == "Fang")]['balance'] = 35.00

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [4]:
#solution is to use loc to locate the specific row and column to update
users.loc[(users.first_name == "Adrian") & (users.last_name == "Fang"), 'balance'] = 35.00
# Display our updated user with the new value assigned
users.loc['adrian']

first_name                               Adrian
last_name                                  Fang
email             adrian.fang@teamtreehouse.com
email_verified                             True
signup_date                          2018-04-28
referral_count                                3
balance                                      35
Name: adrian, dtype: object

In [5]:
# 'at' can also be used to quickly set scalar values
users.at['adrian', 'balance'] = 35.00

In [6]:
# now we need to track the transaction that occured
transactions.head()

Unnamed: 0,sender,receiver,amount,sent_date
0,stein,smoyer,49.03,2018-01-24
1,holden4580,joshua.henry,34.64,2018-02-06
2,rose.eaton,emily.lewis,62.67,2018-02-15
3,lmoore,kallen,1.94,2018-03-05
4,scott3928,lmoore,27.82,2018-03-10


In [7]:
# Let's build a new record
record = dict(sender=np.nan, receiver='adrian', amount=4.99, sent_date=datetime.now().date())

In [8]:
# append with the below method
# append makes a new copy of the df with new row appended and doesnt change the orginal df
# Remember this is returning a copy...
transactions.append(record, ignore_index=True).tail()
# pandas.concat is more effective for multiple rows

Unnamed: 0,sender,receiver,amount,sent_date
994,king3246,john,25.37,2018-09-25
995,shernandez,kristen1581,75.77,2018-09-25
996,leah6255,jholloway,63.62,2018-09-25
997,pamela,michelle4225,2.54,2018-09-25
998,,adrian,4.99,2019-08-18


In [9]:
# if we assign non-existant index then df will be enlarged automaically and row will be added
# problem is that the new index is auto generated 
# workaround is to find last used index and increment it
# Largest current record, incremented
next_key = transactions.index.max() + 1
transactions.loc[next_key] = record 
# Make sure it got added
transactions.tail()

Unnamed: 0,sender,receiver,amount,sent_date
994,king3246,john,25.37,2018-09-25
995,shernandez,kristen1581,75.77,2018-09-25
996,leah6255,jholloway,63.62,2018-09-25
997,pamela,michelle4225,2.54,2018-09-25
998,,adrian,4.99,2019-08-18


In [10]:
#columns can also be added in a simillar way
latest_id = transactions.index.max()
# Add a new column named notes
transactions.at[latest_id, 'notes'] = 'Adrian called customer support to report billing error.'
transactions.tail()

Unnamed: 0,sender,receiver,amount,sent_date,notes
994,king3246,john,25.37,2018-09-25,
995,shernandez,kristen1581,75.77,2018-09-25,
996,leah6255,jholloway,63.62,2018-09-25,
997,pamela,michelle4225,2.54,2018-09-25,
998,,adrian,4.99,2019-08-18,Adrian called customer support to report billi...


In [11]:
#column can be added and assigned from an expression too
# Add a new column called large. This is a bad name and use of a column ;)
transactions['large'] = transactions.amount > 70

In [12]:
transactions.head()

Unnamed: 0,sender,receiver,amount,sent_date,notes,large
0,stein,smoyer,49.03,2018-01-24,,False
1,holden4580,joshua.henry,34.64,2018-02-06,,False
2,rose.eaton,emily.lewis,62.67,2018-02-15,,False
3,lmoore,kallen,1.94,2018-03-05,,False
4,scott3928,lmoore,27.82,2018-03-10,,False


In [13]:
#columns can have name changed too. Default returns a copy but can be done inplace with inplace keyword too 
transactions.rename(columns={'large': 'big_sender'}, inplace=True)
transactions.head()

Unnamed: 0,sender,receiver,amount,sent_date,notes,big_sender
0,stein,smoyer,49.03,2018-01-24,,False
1,holden4580,joshua.henry,34.64,2018-02-06,,False
2,rose.eaton,emily.lewis,62.67,2018-02-15,,False
3,lmoore,kallen,1.94,2018-03-05,,False
4,scott3928,lmoore,27.82,2018-03-10,,False


In [14]:
#columns an also be deleted
# del method works too
transactions.drop(columns=['notes'], inplace=True)
transactions.head()

Unnamed: 0,sender,receiver,amount,sent_date,big_sender
0,stein,smoyer,49.03,2018-01-24,False
1,holden4580,joshua.henry,34.64,2018-02-06,False
2,rose.eaton,emily.lewis,62.67,2018-02-15,False
3,lmoore,kallen,1.94,2018-03-05,False
4,scott3928,lmoore,27.82,2018-03-10,False


In [15]:
# can also be done using the axis parameter
transactions.drop(['big_sender'], axis='columns', inplace=True)
transactions.head()

Unnamed: 0,sender,receiver,amount,sent_date
0,stein,smoyer,49.03,2018-01-24
1,holden4580,joshua.henry,34.64,2018-02-06
2,rose.eaton,emily.lewis,62.67,2018-02-15
3,lmoore,kallen,1.94,2018-03-05
4,scott3928,lmoore,27.82,2018-03-10


In [16]:
# can also drop rows by index
last_key = transactions.index.max()
transactions.drop(index=[last_key], inplace=True)
transactions.tail()

Unnamed: 0,sender,receiver,amount,sent_date
993,coleman,sarah.evans,36.29,2018-09-25
994,king3246,john,25.37,2018-09-25
995,shernandez,kristen1581,75.77,2018-09-25
996,leah6255,jholloway,63.62,2018-09-25
997,pamela,michelle4225,2.54,2018-09-25


## Optional Challenge 2

In [17]:
# Setup
import os
import pandas as pd

from tests.helpers import check

pd.options.display.max_rows = 10
users = pd.read_csv(os.path.join('data', 'users.csv'), index_col=0)
# Pop out a quick sanity check
len(users)

475

In [18]:
users.head()

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
aaron,Aaron,Davis,aaron6348@gmail.com,True,2018-08-31,6,18.14
acook,Anthony,Cook,cook@gmail.com,True,2018-05-12,2,55.45
adam.saunders,Adam,Saunders,adam@gmail.com,False,2018-05-29,3,72.12
adrian,Adrian,Fang,adrian.fang@teamtreehouse.com,True,2018-04-28,3,30.01
adrian.blair,Adrian,Blair,adrian9335@gmail.com,True,2018-06-16,7,25.85


In [19]:
users[users.email == 'kimberly@yahoo.com']

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
kimberly,Kimberly,,kimberly@yahoo.com,False,2018-01-06,5,54.73


In [20]:
users.loc[users.email == 'kimberly@yahoo.com','last_name']= 'Deal'
users[users.email == 'kimberly@yahoo.com']

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
kimberly,Kimberly,Deal,kimberly@yahoo.com,False,2018-01-06,5,54.73


In [21]:
users[users.index == 'jeffrey']

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
jeffrey,Jeffrey,Stewart,stewart7222@hotmail.com,True,2018-01-02,0,40.58


In [22]:
users.rename(index={'jeffrey': 'jefrey'}, inplace=True)
users[users.index == 'jeffrey']

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance


In [23]:
users[users.index == 'jefrey']

Unnamed: 0,first_name,last_name,email,email_verified,signup_date,referral_count,balance
jefrey,Jeffrey,Stewart,stewart7222@hotmail.com,True,2018-01-02,0,40.58
