# Intrdoducing Pandas Objects

## The Pandas Series Object

In [2]:
import pandas as pd
data = pd.Series([0.25,0.5,0.75,1.0])
print(data)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [3]:
data.values

array([ 0.25,  0.5 ,  0.75,  1.  ])

In [4]:
data[1]

0.5

In [5]:
data[1:3]

1    0.50
2    0.75
dtype: float64

### Series as generalized Numpy array

In [7]:
data = pd.Series([0.25,0.5,0.75,1.0],index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [8]:
data['b']

0.5

In [9]:
data = pd.Series([0.25,0.5,0.75,1.0],index=[2,3,5,7])
data

2    0.25
3    0.50
5    0.75
7    1.00
dtype: float64

In [10]:
data[5]

0.75

### Series as specialized dictionary

In [12]:
population_dict = {'California':38332521,
             'Texas':26448193,
             'New York':19651127,
             'Florida':19552860,
             'Illionis':12882135}
population = pd.Series(population_dict)
population

California    38332521
Florida       19552860
Illionis      12882135
New York      19651127
Texas         26448193
dtype: int64

In [13]:
population['Texas']

26448193

In [14]:
population['California':'Illionis']

California    38332521
Florida       19552860
Illionis      12882135
dtype: int64

### Constructing Series objects

In [15]:
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [16]:
pd.Series(5,index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [19]:
pd.Series({2:'a',1:'b',3:'c'})

1    b
2    a
3    c
dtype: object

In [20]:
pd.Series({2:'a',1:'b',3:'c'},index = [3,2])

3    c
2    a
dtype: object

## The Pandas DataFrame Object

### DataFrame as a generalized Numpy array

In [22]:
area_dict = {'California':423976,
             'Texas':695662,
             'New York':141297,
             'Florida':170312,
             'Illionis':149995}
area = pd.Series(area_dict)
area

California    423976
Florida       170312
Illionis      149995
New York      141297
Texas         695662
dtype: int64

In [24]:
stats = pd.DataFrame({'population':population,'area':area})
stats

Unnamed: 0,area,population
California,423976,38332521
Florida,170312,19552860
Illionis,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [25]:
stats.index

Index(['California', 'Florida', 'Illionis', 'New York', 'Texas'], dtype='object')

In [26]:
stats.columns

Index(['area', 'population'], dtype='object')

### DataFrame as specialized dictionary

In [27]:
stats['area']

California    423976
Florida       170312
Illionis      149995
New York      141297
Texas         695662
Name: area, dtype: int64

### Consttucting DataFrame objects

#### From a single Series object

In [28]:
pd.DataFrame(population,columns=['population'])

Unnamed: 0,population
California,38332521
Florida,19552860
Illionis,12882135
New York,19651127
Texas,26448193


#### From a list of dicts

In [29]:
data = [{'a':i,'b':2*i} for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


#### From a dictionary of Series objects

In [31]:
pd.DataFrame({'population':population,'area':area})

Unnamed: 0,area,population
California,423976,38332521
Florida,170312,19552860
Illionis,149995,12882135
New York,141297,19651127
Texas,695662,26448193


#### From a two-dimensional NumPy array

In [33]:
import numpy as np

In [34]:
pd.DataFrame(np.random.rand(3,2),columns=['foo','bar'],index=['a','b','c'])

Unnamed: 0,foo,bar
a,0.100293,0.154128
b,0.03271,0.507145
c,0.890225,0.449863


#### From a Numpy structured array

In [35]:
A = np.zeros(3,dtype=[('A','i8'),('B','f8')])
A

array([(0,  0.), (0,  0.), (0,  0.)],
      dtype=[('A', '<i8'), ('B', '<f8')])

In [36]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


## The Pandas Index Object