## Creating a Series

In [1]:
# Just like how NumPy is almost always abbreviated as np...
import numpy as np
#  pandas is usually shortened to pd
import pandas as pd

In [2]:
#creating dictionary
test_balance_data = {
    'pasan': 20.00,
    'treasure': 20.18,
    'ashley': 1.05,
    'craig': 42.42,
}

In [4]:
#series constructor accepts any dictionary or dictionary like object
#labels are taken from keys and values are taken from the dicitonary's values
balances = pd.Series(test_balance_data)
balances

pasan       20.00
treasure    20.18
ashley       1.05
craig       42.42
dtype: float64

In [7]:
#series can be created from an iterable too 
#default labels is range from zero
unlabeled_balances = pd.Series([20.00, 20.18, 1.05, 42.42])
unlabeled_balances

0    20.00
1    20.18
2     1.05
3    42.42
dtype: float64

In [8]:
#can use index arguement to add labels
#index arguement must be same size as the series
labeled_balances = pd.Series(
    [20.00, 20.18, 1.05, 42.42],
    index=['pasan', 'treasure', 'ashley', 'craig']
)
labeled_balances

pasan       20.00
treasure    20.18
ashley       1.05
craig       42.42
dtype: float64

In [9]:
#series can be made from numpy array since it is iterable
ndbalances = np.array([20.00, 20.18, 1.05, 42.42])
pd.Series(ndbalances)

0    20.00
1    20.18
2     1.05
3    42.42
dtype: float64

In [10]:
#with a scalar it will be broadcast to all the keys
pd.Series(20.00, index=["guil", "jay", "james", "ben", "nick"])

guil     20.0
jay      20.0
james    20.0
ben      20.0
nick     20.0
dtype: float64

## Accessing a Series

In [2]:
#install utils package
pip install python-utils

SyntaxError: invalid syntax (<ipython-input-2-19e7eceafd7a>, line 2)

In [1]:
# Setup
import pandas as pd
#utils is located in same directory as this file(CANT CONDA INSTALL)
from utils import render


# Standard Python dictionary
test_balance_data = {
    'pasan': 20.00,
    'treasure': 20.18,
    'ashley': 1.05,
    'craig': 42.42,
}

balances = pd.Series(test_balance_data)

In [2]:
#accessing by index
balances[0]
#type
type(balances[0])

numpy.float64

In [3]:
# Same as accesssing values from a list
# The last balance
balances[-1]

42.42

In [4]:
#can also access by label
balances['pasan']

20.0

In [5]:
#behave like dictionaries
for label, value in balances.items():
    render("The label {} has a value of {}".format(label, value))

The label pasan has a value of 20.0

The label treasure has a value of 20.18

The label ashley has a value of 1.05

The label craig has a value of 42.42

In [6]:
#key error is raised when you try to access a non-existant index
try:
    balances['kermit']
except KeyError:
    render('Accessing a non-existent key raises a `KeyError`.')

Accessing a non-existent key raises a `KeyError`.

In [7]:
if balances.get('kermit') is None:
    render('Use `get` to safely access keys. `None` is returned if key not present.')

Use `get` to safely access keys. `None` is returned if key not present.

In [8]:
if 'kermit' not in balances:
    render('Use `in` to test the existence of a label.')

Use `in` to test the existence of a label.

In [9]:
#value of a label is accessible through dot notation!!!
balances.ashley

1.05

In [10]:
#more explicit way to specifying how to look up for value
#loc lookups based on just indices
balances.loc['pasan']

20.0

In [11]:
#iloc uses the positional index
# Get the first value
balances.iloc[0]

20.0

In [12]:
#Can also access by slice
#not inclusive
# Includes values from zero
#  up until **and not** including 3
balances.iloc[0:3]

pasan       20.00
treasure    20.18
ashley       1.05
dtype: float64

In [13]:
# Slicing by label is inclusive
# Include the values starting at 'pasan' 
#  up until **and** including 'ashley'
balances.loc['pasan':'ashley']

pasan       20.00
treasure    20.18
ashley       1.05
dtype: float64

In [14]:
import pandas as pd

test_balance_data = {
    'pasan': 20.00,
    'treasure': 20.18,
    'ashley': 1.05,
    'craig': 42.42,
}

test_deposit_data = {
    'pasan': 20,
    'treasure': 10,
    'ashley': 100,
    'craig': 55,   
}

balances = pd.Series(test_balance_data)
deposits = pd.Series(test_deposit_data)

In [15]:
#pandas series can broadcast values. each value in balances will be incremented by the corresponding value in deposits
balances += deposits
balances

pasan        40.00
treasure     30.18
ashley      101.05
craig        97.42
dtype: float64

In [16]:
# 5 is brodacsted and added to each and every value. This returns a new Series.
balances + 5

pasan        45.00
treasure     35.18
ashley      106.05
craig       102.42
dtype: float64

In [17]:
coupons = pd.Series(1, ['craig', 'ashley', 'james'])
coupons

craig     1
ashley    1
james     1
dtype: int64

In [18]:
# Returns a new Series
#values which only exist in the balances series but not the coupons series are replaced by NaN(Not a Number), this is a problem
balances + coupons

ashley      102.05
craig        98.42
james          NaN
pasan          NaN
treasure       NaN
dtype: float64

In [19]:
# Returns a new Series
#use the fill_value keyword with the add method to fill the missing values in coupons with zero
balances.add(coupons, fill_value=0)

ashley      102.05
craig        98.42
james         1.00
pasan        40.00
treasure     30.18
dtype: float64

In [20]:
#also really cool in this example the labels line up and add even though the order of the labels are differnt
totals = {
    'mario': 135,
    'peach': 149,
    'yoshi': 122,
}
final = {
    'peach': 45,
    'mario': 63,
    'yoshi': 77,
}
total_laps = pd.Series(totals)
final_lap = pd.Series(final)
total_laps + final_lap

mario    198
peach    194
yoshi    199
dtype: int64