In [7]:
import pandas as pd
import numpy as np
ser = pd.Series([7,4,-1,3])

In [2]:
ser

0    7
1    4
2   -1
3    3
dtype: int64

In [3]:
ser.values

array([ 7,  4, -1,  3])

In [4]:
ser.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
ser[1]

4

In [6]:
ser[[2,1,3]]

2   -1
1    4
3    3
dtype: int64

[2,1,3] interpreted as a list of indices

In [10]:
np.exp(pd.Series([1,1,1,1]))

0    2.718282
1    2.718282
2    2.718282
3    2.718282
dtype: float64

In [11]:
1 in ser  # ser.index just like the dict's key

True

Create a Series object from a Python dict 

In [12]:
d = {'name':"zjtprince","age":32,"profession":"engineer"}

In [13]:
obj = pd.Series(d)

In [14]:
obj

name          zjtprince
age                  32
profession     engineer
dtype: object

In [16]:
index = ['name','age','gender']
obj2 = pd.Series(d, index=index)


In [17]:
obj2

name      zjtprince
age              32
gender          NaN
dtype: object

In [18]:
obj+obj2

age                           64
gender                       NaN
name          zjtprincezjtprince
profession                   NaN
dtype: object

A useful Series feature for many applications  is that it automatically asigns by index label in arithmatic operations

In [21]:
obj.name='me'
# obj.index.name='info'
obj


info
name          zjtprince
age                  32
profession     engineer
Name: me, dtype: object

In [23]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


pd.DataFrame in jupyter notebook displayed as a more browser-frendly HTML table

In [24]:
frame.index=['one','two','three','four','five','six']

In [25]:
frame

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
two,Ohio,2001,1.7
three,Ohio,2002,3.6
four,Nevada,2001,2.4
five,Nevada,2002,2.9
six,Nevada,2003,3.2


In [26]:
frame['pop']

one      1.5
two      1.7
three    3.6
four     2.4
five     2.9
six      3.2
Name: pop, dtype: float64

The returned Series have the same index of the DataFrame , and their name has been appropriately set.

In [31]:
pop = frame['pop']

In [32]:
pop[:]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop[:]=1


In [33]:
pop

one      1.0
two      1.0
three    1.0
four     1.0
five     1.0
six      1.0
Name: pop, dtype: float64

In [34]:
frame

Unnamed: 0,state,year,pop
one,Ohio,2000,1.0
two,Ohio,2001,1.0
three,Ohio,2002,1.0
four,Nevada,2001,1.0
five,Nevada,2002,1.0
six,Nevada,2003,1.0


In [35]:
frame.T

Unnamed: 0,one,two,three,four,five,six
state,Ohio,Ohio,Ohio,Nevada,Nevada,Nevada
year,2000,2001,2002,2001,2002,2003
pop,1,1,1,1,1,1


In [36]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [37]:
df3 = pd.DataFrame(pop)

In [38]:
df3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [39]:
df3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [42]:
type (df3.values
     )

numpy.ndarray

#reindex

In [43]:
s = pd.Series(['puple','yellow','blue'], index=[0,2,4])

In [44]:
s

0     puple
2    yellow
4      blue
dtype: object

In [45]:
s.reindex(range(6), method='ffill')

0     puple
1     puple
2    yellow
3    yellow
4      blue
5      blue
dtype: object

In [46]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),

    index=['Ohio', 'Colorado', 'Utah', 'New York'],

    columns=['one', 'two', 'three', 'four'])

In [53]:
df = pd.DataFrame(data)
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [48]:
df['one']  # returned a Series

Ohio         0
Colorado     4
Utah         8
New York    12
Name: one, dtype: int64

In [51]:
df[['one']]  # returned a DataFrame

Unnamed: 0,one
Ohio,0
Colorado,4
Utah,8
New York,12


In [52]:
df[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [54]:
data['Ohio':'Utah'] 

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11


Slicing with labels behaves differently than normal Python siicing in that the end-point is inclusive

In [55]:
df[1:3]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11


In [60]:
data < 5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [62]:
data.loc['Colorado',['one','two']]

one    4
two    5
Name: Colorado, dtype: int64

In [64]:
type(data.loc[['Colorado','Utah'],['one','two']])

pandas.core.frame.DataFrame

In [70]:
data.loc[:,:][data['two'] > 5]

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15
