In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
data = pd.Series([1,2,3,48,9,1,2])

In [4]:
data

0     1
1     2
2     3
3    48
4     9
5     1
6     2
dtype: int64

##### series returns an indexed series with two columns values and index

In [5]:
data.values

array([ 1,  2,  3, 48,  9,  1,  2])

In [7]:
data.value_counts?

In [8]:
data.value_counts

<bound method Series.value_counts of 0     1
1     2
2     3
3    48
4     9
5     1
6     2
dtype: int64>

##### returns the count of values on that particular index

In [9]:
data[1:3]

1    2
2    3
dtype: int64

In [10]:
data = pd.Series([4,2,6,1,2,6],index=['a','b','c','w','h','k'])

In [11]:
data

a    4
b    2
c    6
w    1
h    2
k    6
dtype: int64

###### indexed series need explicit index declaration

In [13]:
data['b']

2

## DICTIONARY

In [14]:
population_dict = {'California': 38332521,
                           'Texas': 26448193,
                           'New York': 19651127,
                           'Florida': 19552860,
                           'Illinois': 12882135}

In [15]:
population_dict['Texas']

26448193

In [16]:
pd.Series(5, index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [17]:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

### The Pandas DataFrame Object

In [19]:
population_dict = {'California': 38332521,
                           'Texas': 26448193,
                           'New York': 19651127,
                           'Florida': 19552860,
                           'Illinois': 12882135}
population = pd.Series(population_dict)

In [21]:
area_dict = {'California': 423967, 'Texas': 695662,
             'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}

In [22]:
area = pd.Series(area_dict)
area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [23]:
states = pd.DataFrame({'population': population,
                               'area': area})

In [24]:
states

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [25]:
states.index

Index([u'California', u'Florida', u'Illinois', u'New York', u'Texas'], dtype='object')

In [26]:
states['area']

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

In [27]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Florida,19552860
Illinois,12882135
New York,19651127
Texas,26448193


#### FROM a list of dicts using FOR during dataframe construction

In [28]:
data = [{'a':i, 'b':2*i}
           for i in range(3)]

pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


#### FROM a TWO DIMENSIONAL Numpy array create DF with column and index

In [29]:
pd.DataFrame(np.random.rand(3,2),
            columns=['foo','bar'],
            index=['a','b','c'])

Unnamed: 0,foo,bar
a,0.530344,0.90636
b,0.113786,0.613485
c,0.433685,0.563832


#### STRUCTURED ARRAY

In [30]:
A = np.zeros(3, dtype=[('A','i8'),('B','f8')])

In [31]:
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

### INDEX AS ORDERED SET

In [32]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

##### Intersection of indA and indB

In [33]:
indA & indB

Int64Index([3, 5, 7], dtype='int64')

Union of indA and IndB

In [34]:
indA | indB

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

##### symmetric difference

In [35]:
indA ^ indB

Int64Index([1, 2, 9, 11], dtype='int64')

### SERIES AS DICTIONARY

In [36]:
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0],
index=['a', 'b', 'c', 'd'])

In [37]:
data['a']

0.25

##### boolean opperation

In [38]:
'a' in data

True

KEYS

In [39]:
data.keys()

Index([u'a', u'b', u'c', u'd'], dtype='object')

In [40]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [42]:
data['e']=1.25

In [43]:
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

## SERIES AS ONE-DIMENSIONAL ARRAY

#### Slicing by index

In [44]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [45]:
data[0:2]

a    0.25
b    0.50
dtype: float64

#### MASKING

In [46]:
data[(data > 0.3) & (data < 0.8)]

b    0.50
c    0.75
dtype: float64

#### FANCY INDEXING

In [47]:
data[['a','e']]

a    0.25
e    1.25
dtype: float64