In [1]:
import pandas as pd


In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])

In [3]:
data['a']

0.25

In [4]:
population_dict = {'California': 31, 'Texas': 24, 'New York': 1, 'Florida': 22, 'Illinois': 1000}

population = pd.Series(population_dict)
population

California      31
Florida         22
Illinois      1000
New York         1
Texas           24
dtype: int64

In [5]:
pd.Series({2: 'a', 1:'b', 3:'c'}, index=[3,2])

3    c
2    a
dtype: object

In [6]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
area = pd.Series(area_dict)
population = pd.Series(population_dict)

In [7]:
states = pd.DataFrame({'population': population_dict, 'area': area_dict})

In [8]:
states

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [9]:
states.columns

Index(['area', 'population'], dtype='object')

In [10]:
ind = pd.Index([2,3,4,7,11])
ind

Int64Index([2, 3, 4, 7, 11], dtype='int64')

In [11]:
ind[1]

3

In [12]:
ind[::2]

Int64Index([2, 4, 11], dtype='int64')

In [13]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

5 (5,) 1 int64


In [14]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [15]:
data['b']

0.5

In [16]:
'a' in data

True

In [17]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [18]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [19]:
data['e'] = 1.25

In [20]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [21]:
data[(data > 0.3) & (data < 0.8)] 

b    0.50
c    0.75
dtype: float64

In [22]:
data.a

0.25

In [23]:
data[['a', 'w']]

a    0.25
w     NaN
dtype: float64

In [27]:
data.iloc[1:3]

b    0.50
c    0.75
dtype: float64

In [28]:
area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})

data = pd.DataFrame({'area': area, 'pop': pop})

In [29]:
data

Unnamed: 0,area,pop
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [30]:
data['area']

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

In [31]:
data.keys()

Index(['area', 'pop'], dtype='object')

In [36]:
data.area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

In [37]:
data['density'] = data['pop'] / data['area']

In [41]:
data.values

array([[  4.23967000e+05,   3.83325210e+07,   9.04139261e+01],
       [  1.70312000e+05,   1.95528600e+07,   1.14806121e+02],
       [  1.49995000e+05,   1.28821350e+07,   8.58837628e+01],
       [  1.41297000e+05,   1.96511270e+07,   1.39076746e+02],
       [  6.95662000e+05,   2.64481930e+07,   3.80187404e+01]])

In [42]:
data.T

Unnamed: 0,California,Florida,Illinois,New York,Texas
area,423967.0,170312.0,149995.0,141297.0,695662.0
pop,38332520.0,19552860.0,12882140.0,19651130.0,26448190.0
density,90.41393,114.8061,85.88376,139.0767,38.01874


In [43]:
data.values[0]

array([  4.23967000e+05,   3.83325210e+07,   9.04139261e+01])

In [48]:
data.iloc[:3, :2] = 90
data

Unnamed: 0,area,pop,density
California,90,90,90.413926
Florida,90,90,114.806121
Illinois,90,90,85.883763
New York,141297,19651127,139.076746
Texas,695662,26448193,38.01874


In [45]:
data.loc[:'Illinois', :'pop']

Unnamed: 0,area,pop
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135


In [46]:
data.ix[:3, :'pop']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.


Unnamed: 0,area,pop
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135


In [49]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [50]:
population /area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [52]:
population.index | area.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [53]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [54]:
A.add(B, fill_value=0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64