In [1]:
import pandas as pd
import numpy as np

In [2]:
data = [10, 20, 30, 40, 50]
data

[10, 20, 30, 40, 50]

In [3]:
# creating a series using data in a list
# since we never give any index it is automatically generated
myfirstseries = pd.Series(data)
myfirstseries
# the axis labels are called index

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [4]:
data = ['apple', 'orange', 'mango', 'banana', 'grapes']
data

['apple', 'orange', 'mango', 'banana', 'grapes']

In [5]:
# creating a series using data in a list
# since we never give any index it is automatically generated
mysecondseries = pd.Series(data)
mysecondseries

0     apple
1    orange
2     mango
3    banana
4    grapes
dtype: object

In [6]:
# creating a series using data in a list and index in another list
data = [3794000, 2194100, 1928800, 937500, 1684600]
index = ['Johor', 'Kedah', 'Kelantan', 'Malacca', 'Pahang']
mypopulationseries = pd.Series(data, index)
mypopulationseries

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
Pahang      1684600
dtype: int64

In [7]:
# creating a series using dictionary
data = {
    'Johor':3794000, 
    'Kedah':2194100, 
    'Kelantan':1928800, 
    'Malacca':937500, 
    'Pahang':1684600
}
mypopulationseries = pd.Series(data)
mypopulationseries

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
Pahang      1684600
dtype: int64

In [8]:
# creating a series using dictionary and index in a list
data = {
    'Johor':3794000, 
    'Kedah':2194100, 
    'Kelantan':1928800, 
    'Malacca':937500, 
    'Pahang':1684600
}
index = ['Kedah', 'Kelantan']
mypopulationseries = pd.Series(data, index)
mypopulationseries

Kedah       2194100
Kelantan    1928800
dtype: int64

### Selection and Indexing

In [9]:
mysecondseries

0     apple
1    orange
2     mango
3    banana
4    grapes
dtype: object

In [10]:
# you can treat it like numpy array and try to retrieve values
# however negative index is not supported
mysecondseries[0]

'apple'

In [11]:
# range is allowed
mysecondseries[0:3]

0     apple
1    orange
2     mango
dtype: object

In [12]:
# since no start index start from 0
# since no end index goes all the way to the last item
# since we have 3rd parameter it step up by 2
mysecondseries[::2]

0     apple
2     mango
4    grapes
dtype: object

In [13]:
data = {
    'Johor':3794000, 
    'Kedah':2194100, 
    'Kelantan':1928800, 
    'Malacca':937500, 
    'Pahang':1684600
}
mypopulationseries = pd.Series(data)
mypopulationseries

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
Pahang      1684600
dtype: int64

In [14]:
# you can treat it like dictionary and try to retrieve values
mypopulationseries['Johor']

np.int64(3794000)

In [15]:
# however this cannot be done in dictionary
mypopulationseries[['Johor', 'Kedah']]

Johor    3794000
Kedah    2194100
dtype: int64

In [16]:
mysecondseries[[0, 2, 4]]

0     apple
2     mango
4    grapes
dtype: object

In [17]:
# Series has a property loc which can also be used to pull out
# rows in a series using the generated index
mysecondseries.loc[[0, 2, 4]]

0     apple
2     mango
4    grapes
dtype: object

In [18]:
# Series has a property loc which can also be used to pull out
# rows in a series using the created index
mypopulationseries.loc[['Johor', 'Kedah', 'Malacca']]

Johor      3794000
Kedah      2194100
Malacca     937500
dtype: int64

In [19]:
# Does mypopulationseries has generated index ?
# The index 0, 1, 2 ... is still generated eventhough we pass 
# our index to the series creation
# How can i use that generated index an pull out the items
# there is another property called iloc using that we can pass
# the generated index and pull out the items

# mypopulationseries['Johor']
# mypopulationseries.loc['Johor']
mypopulationseries.iloc[0]

np.int64(3794000)

In [20]:
mypopulationseries.iloc[0:4]

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
dtype: int64

In [21]:
mypopulationseries.iloc[[0, 2, 4]]

Johor       3794000
Kelantan    1928800
Pahang      1684600
dtype: int64

In [22]:
mypopulationseries = mypopulationseries.astype('int32')
mypopulationseries

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
Pahang      1684600
dtype: int32

In [23]:
mypopulationseries.dtype

dtype('int32')

In [24]:
type(mypopulationseries)

pandas.core.series.Series

In [25]:
mypopulationlist = mypopulationseries.tolist()
mypopulationlist

[3794000, 2194100, 1928800, 937500, 1684600]

### Arithmetic operations

In [26]:
data = {
    'Johor':3794000, 
    'Kedah':2194100, 
    'Kelantan':1928800, 
    'Malacca':937500, 
    'Pahang':1684600
}
mypopulationseries2023 = pd.Series(data)
mypopulationseries2023

Johor       3794000
Kedah       2194100
Kelantan    1928800
Malacca      937500
Pahang      1684600
dtype: int64

In [27]:
data = {
    'Johor':3794000, 
    'Kedah':2194100, 
    'Kelantan':1928800, 
    'Penang':1774400, 
    'Pahang':1684600
}
mypopulationseries2024 = pd.Series(data)
mypopulationseries2024

Johor       3794000
Kedah       2194100
Kelantan    1928800
Penang      1774400
Pahang      1684600
dtype: int64

In [28]:
# mypopulationseries2023.add(mypopulationseries2024)
mypopulationseries2023 + mypopulationseries2024
# since the key does not match it converts the result to NaN

Johor       7588000.0
Kedah       4388200.0
Kelantan    3857600.0
Malacca           NaN
Pahang      3369200.0
Penang            NaN
dtype: float64

In [29]:
# mypopulationseries2023.sub(mypopulationseries2024)
mypopulationseries2023 - mypopulationseries2024

Johor       0.0
Kedah       0.0
Kelantan    0.0
Malacca     NaN
Pahang      0.0
Penang      NaN
dtype: float64

In [30]:
# mypopulationseries2023.mul(mypopulationseries2024)
mypopulationseries2023 * mypopulationseries2024

Johor       1.439444e+13
Kedah       4.814075e+12
Kelantan    3.720269e+12
Malacca              NaN
Pahang      2.837877e+12
Penang               NaN
dtype: float64

In [31]:
# mypopulationseries2023.div(mypopulationseries2024)
mypopulationseries2023 / mypopulationseries2024

Johor       1.0
Kedah       1.0
Kelantan    1.0
Malacca     NaN
Pahang      1.0
Penang      NaN
dtype: float64

### Methods and Universal functions

In [32]:
# statistical analysis using methods
mypopulationseries.sum()

np.int64(10539000)

In [33]:
# statistical analysis using universal functions
np.sum(mypopulationseries)

np.int64(10539000)

In [34]:
# statistical analysis using methods
mypopulationseries.mean()

np.float64(2107800.0)

In [35]:
# take every single element in the series and multiply them
mydata = pd.Series([1, 2, 3])
mydata.prod()

np.int64(6)

In [36]:
np.std(mypopulationseries)

np.float64(941430.7685645291)

In [37]:
np.var(mypopulationseries)

np.float64(886291892000.0)

In [38]:
myfirstseries = pd.Series([2, 4, 6])
mysecondseries = pd.Series([1, 3, 5])
myfirstseries.cov(mysecondseries) # covariance

np.float64(4.0)

In [39]:
myfirstseries.corr(mysecondseries) # correlation value is always between -1 to 1
# -1 negatively correlated 
# -0.5 negatively partially correlated
# 0 no relationship
# 0.5 positively partially correlated
# 1 positively correlated

np.float64(1.0)

In [40]:
mythirdseries = pd.Series([2, 4, 6, 4, 8, 10, 4, 4, 12])
mythirdseries

0     2
1     4
2     6
3     4
4     8
5    10
6     4
7     4
8    12
dtype: int64

In [41]:
mythirdseries.size  # how many values

9

In [42]:
mythirdseries.count() # how many not null values

np.int64(9)

In [43]:
mythirdseries.value_counts()

4     4
2     1
6     1
8     1
10    1
12    1
Name: count, dtype: int64