In [98]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [99]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()

## Pandas Series
This is a one-dimensional array of indexed data, that can be created from a list or array

In [100]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [101]:
# the value of the pd.Series
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [102]:
# the pd.Series index
data.index

RangeIndex(start=0, stop=4, step=1)

In [103]:
# getting the value of a pd.Series through it's index
data[1]

0.5

In [104]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [105]:
# pd.Series object have index that are explicitly defined
data = pd.Series([.25, .50, .75, 1.0], index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [106]:
data['b']

0.5

In [107]:
data = pd.Series([.25, .50, .75, 1.0], index=[2, 5, 3, 7])
data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [108]:
data[3]

0.75

In [128]:
# a pd.Series object can be made from a python dict
population_dict = {'california':38.33,
                   'texas':26.44,
                   'new york' : 19.65,
                   'florida':19.55,
                   'illinois':12.88,
                  }
population = pd.Series(population_dict)
population

california    38.33
texas         26.44
new york      19.65
florida       19.55
illinois      12.88
dtype: float64

In [110]:
population['california']

38.33

In [111]:
population['california':'illinois']

california    38.33
texas         26.44
new york      19.65
florida       19.55
illinois      12.88
dtype: float64

In [112]:
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [113]:
# pd.Series object from a scalar value
pd.Series(5, index=range(100, 400, 100))

100    5
200    5
300    5
dtype: int64

In [114]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

In [115]:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

# Pandas DataFrame

In [116]:
area_dict = {
    'california':42.39,
    'texas':69.56,
    'new york':14.12,
    'florida':17.02,
    'illinois':14.99
}
area = pd.Series(area_dict)
area

california    42.39
texas         69.56
new york      14.12
florida       17.02
illinois      14.99
dtype: float64

In [117]:
states = pd.DataFrame({'population':population,
                      'area':area,})
states

Unnamed: 0,population,area
california,38.33,42.39
texas,26.44,69.56
new york,19.65,14.12
florida,19.55,17.02
illinois,12.88,14.99


In [118]:
states.index

Index(['california', 'texas', 'new york', 'florida', 'illinois'], dtype='object')

In [119]:
states.values

array([[38.33, 42.39],
       [26.44, 69.56],
       [19.65, 14.12],
       [19.55, 17.02],
       [12.88, 14.99]])

In [120]:
states['area']

california    42.39
texas         69.56
new york      14.12
florida       17.02
illinois      14.99
Name: area, dtype: float64

In [121]:
states['population']

california    38.33
texas         26.44
new york      19.65
florida       19.55
illinois      12.88
Name: population, dtype: float64

In [122]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
california,38.33
texas,26.44
new york,19.65
florida,19.55
illinois,12.88


In [123]:
pd.DataFrame([{'a':1, 'b':2}, {'b':3, 'c':4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [124]:
pd.DataFrame({'population':population, 'area':area})

Unnamed: 0,population,area
california,38.33,42.39
texas,26.44,69.56
new york,19.65,14.12
florida,19.55,17.02
illinois,12.88,14.99


In [125]:
pd.DataFrame(np.random.rand(3,2), columns=['foo', 'bar'], index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.555953,0.952551
b,0.375739,0.086734
c,0.557539,0.609215
