# Getting Started with simple pandas

Here I reproduced some of the operation that appeared in *Python for Data Analysis*.

Hope this could help you get familiar with simple pandas

Also, since there is no good visualization for simple pandas right now, I have transfered each of the results to pandas.

In [1]:
import spandas as spd
import pandas as pd
import numpy as np

## 1 Series

In [2]:
obj = spd.Series([4, 7, -5, 3])
pd.Series(obj.values, obj.index)

0    4
1    7
2   -5
3    3
dtype: int32

In [3]:
obj.values

array([ 4,  7, -5,  3])

In [4]:
obj.index

array([0, 1, 2, 3])

In [5]:
obj2 = spd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])

In [6]:
tmp = obj2[obj2.index == 'a']
pd.Series(tmp.values, tmp.index)

a   -5
dtype: int32

In [7]:
obj2.set_by_index('d', 6)
pd.Series(obj2.values, obj2.index)

d    6
b    7
a   -5
c    3
dtype: int32

In [8]:
tmp = obj2[obj2 > 0]
pd.Series(tmp.values, tmp.index)

d    6
b    7
c    3
dtype: int32

In [9]:
tmp = obj2 * 2
pd.Series(tmp.values, tmp.index)

d    12
b    14
a   -10
c     6
dtype: int32

In [10]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon':16000, 'Utah': 5000}
obj3 = spd.Series(sdata)
pd.Series(obj3.values, obj3.index)

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int32

In [11]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = spd.Series(sdata, index=states)
pd.Series(obj4.values, obj4.index)

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [12]:
tmp = obj4.isnull()
pd.Series(tmp.values, tmp.index)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [13]:
# the add in pandas may results in unexpected result...
# therefore, we just add two series elementwise according to their row number and maintain the index of the left
a = spd.Series([1, 2, 3], [1, 1, 2])
b = spd.Series([1, 2, 3], [2, 2, 2])
print(a + b)

      1      2
      1      4
      2      6
dtype: int32


## 2 DataFrame

In [14]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'], 
        'year': [2000, 2001, 2002, 2001, 2002, 2003], 
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame = spd.DataFrame(data)

pd.DataFrame(frame.dict, frame.index)

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [15]:
frame = spd.DataFrame(data, columns=['year', 'state', 'pop'])
pd.DataFrame(frame.dict, frame.index)

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [16]:
frame2 = spd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'], 
                      index=['one', 'two', 'three', 'four', 'five', 'six'])
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [17]:
frame2.columns

['year', 'state', 'pop', 'debt']

In [18]:
print(frame2[['state']])

    one   Ohio
    two   Ohio
  three   Ohio
   four Nevada
   five Nevada
    six Nevada
dtype: <U6


In [19]:
tmp = frame2[frame2.index == 'three']
pd.DataFrame(tmp.dict, tmp.index)

Unnamed: 0,year,state,pop,debt
three,2002,Ohio,3.6,


In [20]:
frame2[['debt']] = 16.5
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,16.5
two,2001,Ohio,1.7,16.5
three,2002,Ohio,3.6,16.5
four,2001,Nevada,2.4,16.5
five,2002,Nevada,2.9,16.5
six,2003,Nevada,3.2,16.5


In [21]:
frame2[['debt']] = np.arange(6.)
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0
six,2003,Nevada,3.2,5.0


In [22]:
val = spd.Series([-1.2, -1.5, -1.7], index=['four', 'two', 'five'])
frame2[['debt']].set_by_index(val.index, val.values)
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,-1.5
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,-1.2
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,5.0


In [23]:
frame2[['eastern']] = frame2[['state']] == 'Ohio'
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt,eastern
one,2000,Ohio,1.5,0.0,1.0
two,2001,Ohio,1.7,-1.5,1.0
three,2002,Ohio,3.6,2.0,1.0
four,2001,Nevada,2.4,-1.2,0.0
five,2002,Nevada,2.9,-1.7,0.0
six,2003,Nevada,3.2,5.0,0.0


In [24]:
del frame2[['eastern']]
pd.DataFrame(frame2.dict, frame2.index)

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,-1.5
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,-1.2
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,5.0


In [25]:
frame2.columns

['year', 'state', 'pop', 'debt']

In [26]:
tmp = frame2.T
pd.DataFrame(tmp.dict, tmp.index)

Unnamed: 0,one,two,three,four,five,six
year,2000,2001,2002,2001,2002,2003
state,Ohio,Ohio,Ohio,Nevada,Nevada,Nevada
pop,1.5,1.7,3.6,2.4,2.9,3.2
debt,0.0,-1.5,2.0,-1.2,-1.7,5.0
