# Pandas Series: The Basics

In [5]:
import numpy as np

import pandas as pd

pd.__version__

'0.25.3'

### Creating a Pandas Series

#### From a Numpy Array

In [6]:
s1 = pd.Series(np.random.randint(0,100,5))

s1

0    44
1    18
2    12
3    34
4    53
dtype: int64

#### From a List

In [7]:
s2 = pd.Series([10, -12, 30, 40, 50])

s2

0    10
1   -12
2    30
3    40
4    50
dtype: int64

#### From a Dictionary

In [8]:
s3 = pd.Series({ 0 : 1, 1 : 23, 2 : 43, 3 : 89, 4 : 44 })

s3

0     1
1    23
2    43
3    89
4    44
dtype: int64

### Let's view all the three Series side-by-side

In [9]:
# side_by_side function from Wes McKinney, author of Pandas

def side_by_side(*objs, **kwds):
    from pandas.io.formats.printing import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print (adjoin(space, *reprs))
    

In [10]:
#  np.c_(s1, s2, s3) for numpyarray

In [10]:
side_by_side(s1, s2, s3)

0    44         0    10         0     1     
1    18         1   -12         1    23     
2    12         2    30         2    43     
3    34         3    40         3    89     
4    53         4    50         4    44     
dtype: int64    dtype: int64    dtype: int64


### Indexes can be more meaningful

In [15]:
s4 = pd.Series(np.random.randint(0,100,5), index=['tom', 'jane', 'harry', 'ramesh', 'rachel'])

s4

tom        2
jane      66
harry      6
ramesh    19
rachel    38
dtype: int64

allow 1'd only

### Accessing Values

#### All Values

In [9]:
s4.values

array([63, 29, 39, 83, 57])

In [10]:
type(s4.values)

numpy.ndarray

#### All Indexes

In [11]:
s4.index

Index(['tom', 'jane', 'harry', 'ramesh', 'rachel'], dtype='object')

In [12]:
type(s4.index)

pandas.core.indexes.base.Index

#### Len, Size, Shape

In [13]:
# Series len, size, shape etc.

len(s4), s4.size, s4.shape

(5, 5, (5,))

In [18]:
s_temp = pd.Series([1, 10, -3, np.nan])

s_temp

0     1.0
1    10.0
2    -3.0
3     NaN
dtype: float64

In [19]:
len(s_temp), s_temp.size, s_temp.shape

(4, 4, (4,))

In [20]:
s_temp.count() # Gives all the non-nan values

3

#### Index Value tests

In [22]:
# checking if the index value exists 

print ('ramesh' in s4)

print ('jin' in s4)

True
False


#### Head and Tail

In [23]:
s4.head(2)

tom      2
jane    66
dtype: int64

In [24]:
s4.tail(3)

harry      6
ramesh    19
rachel    38
dtype: int64

#### Accessing value by Index

In [25]:
s4

tom        2
jane      66
harry      6
ramesh    19
rachel    38
dtype: int64

In [26]:
s4['ramesh']

19

In [27]:
s1

0    25
1    57
2    64
3    93
4    97
dtype: int64

In [28]:
s1[3]

93

#### Do not confuse between Position-based and Label-based lookup

In [29]:
s5 = pd.Series(np.random.randint(1,100, 5), index=[45, 87, 48, 23, 43])

s5

45     6
87    70
48     2
23    98
43    98
dtype: int64

In [30]:
s5[45]  # Try s5[0] also 

6

#### Use .loc for Label based lookup


In [31]:
s5.loc[23]

98

In [None]:
# s5.loc[0] # Gives an Error! 

In [32]:
s5.loc[[23, 48, 45]]

23    98
48     2
45     6
dtype: int64

#### Use .iloc for Position based lookup

In [35]:
s5.iloc[0], s5.iloc[1]

(6, 70)

In [None]:
# s5.iloc[23] # Gives an Error! 

In [36]:
s5.iloc[[0,2,4]]

45     6
48     2
43    98
dtype: int64

In [37]:
s5.iloc[1:3]

87    70
48     2
dtype: int64

#### Use .ix to lookup by Label or by Position

In [38]:
s4

tom        2
jane      66
harry      6
ramesh    19
rachel    38
dtype: int64

In [39]:
s4.ix['jane']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


66

In [40]:
s4.ix[['jane', 'rachel']]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


jane      66
rachel    38
dtype: int64

In [41]:
s4.ix[3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


19

In [42]:
s4.ix[[1,4]]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


jane      66
rachel    38
dtype: int64

Generally .loc & .iloc are preferred so there is no ambiguity and have better performance.

### Enumeration

In [44]:
for item in s4.iteritems():
    print (item)

('tom', 2)
('jane', 66)
('harry', 6)
('ramesh', 19)
('rachel', 38)


### Naming

In [45]:
s4.name ='Random'
s4.index.name = 'MyIndex'

s4

MyIndex
tom        2
jane      66
harry      6
ramesh    19
rachel    38
Name: Random, dtype: int64