In [42]:
import pandas as pd
import numpy as np

In [2]:
data = [10,20,30,40,50]
data

[10, 20, 30, 40, 50]

In [4]:
myfirstseries = pd.Series (data)
myfirstseries

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [6]:
data = ['apple','orange','mango','banana','grapes']
data

['apple', 'orange', 'mango', 'banana', 'grapes']

In [7]:
mysecondseries = pd.Series (data)
mysecondseries

0     apple
1    orange
2     mango
3    banana
4    grapes
dtype: object

In [8]:
data = [300, 200, 100, 50, 10]
index = ['Johor', 'Kedah', 'Kelantan', 'Melaka', 'Pahang']
mypopulationseries = pd.Series(data, index)
mypopulationseries

Johor       300
Kedah       200
Kelantan    100
Melaka       50
Pahang       10
dtype: int64

In [10]:
data = {
    'Johor': 300,
    'Kedah': 200,
    'Kelantan': 100,
    'Melaka': 50,
    'Pahang': 10
}
mypopulationseries = pd.Series(data)
mypopulationseries

Johor       300
Kedah       200
Kelantan    100
Melaka       50
Pahang       10
dtype: int64

In [12]:
# creating series using dictionary

data = {
    'Johor': 300,
    'Kedah': 200,
    'Kelantan': 100,
    'Melaka': 50,
    'Pahang': 10
}
index = ['Kedah', 'Kelantan']
mypopulationseries = pd.Series(data, index)
mypopulationseries

Kedah       200
Kelantan    100
dtype: int64

### Selection and Indexing

In [13]:
mysecondseries

0     apple
1    orange
2     mango
3    banana
4    grapes
dtype: object

In [14]:
# can treat like numpy array

mysecondseries [0:3]

0     apple
1    orange
2     mango
dtype: object

In [15]:
mysecondseries [::2]

0     apple
2     mango
4    grapes
dtype: object

In [20]:
data = {
    'Johor': 300,
    'Kedah': 200,
    'Kelantan': 100,
    'Melaka': 50,
    'Pahang': 10
}
mypopulationseries = pd.Series(data)
mypopulationseries

Johor       300
Kedah       200
Kelantan    100
Melaka       50
Pahang       10
dtype: int64

In [23]:
mypopulationseries[['Johor','Kedah']]

Johor    300
Kedah    200
dtype: int64

In [24]:
mysecondseries [[0,2,4]]

0     apple
2     mango
4    grapes
dtype: object

In [26]:
# loc used to pull out rows in a series

mysecondseries.loc[[0,2,4]]

0     apple
2     mango
4    grapes
dtype: object

In [27]:
mypopulationseries.loc[['Johor', 'Kedah', 'Melaka']]

Johor     300
Kedah     200
Melaka     50
dtype: int64

In [28]:
# does mypopulationseries has generated index?

mypopulationseries.iloc[0]

np.int64(300)

In [29]:
mypopulationseries.iloc[0:4]

Johor       300
Kedah       200
Kelantan    100
Melaka       50
dtype: int64

In [30]:
mypopulationseries.iloc[[0,2,4]]

Johor       300
Kelantan    100
Pahang       10
dtype: int64

In [34]:
mypopulationseries = mypopulationseries.astype ('int32')
mypopulationseries

Johor       300
Kedah       200
Kelantan    100
Melaka       50
Pahang       10
dtype: int32

In [36]:
mypopulationseries.dtype

dtype('int32')

In [35]:
type(mypopulationseries)

pandas.core.series.Series

In [38]:
mypopulationlist = mypopulationseries.tolist()
mypopulationlist

[300, 200, 100, 50, 10]

### Method and Universal Function

In [39]:
mypopulationseries.sum()

np.int64(660)

In [40]:
mypopulationseries.mean()

np.float64(132.0)

In [43]:
np.sum(mypopulationseries) 

np.int64(660)

### Aritmetic Operations

In [44]:
data = {
    'Johor': 300,
    'Kedah': 200,
    'Kelantan': 100,
    'Melaka': 50,
    'Pahang': 10
}
mypopulationseries2023 = pd.Series(data)
mypopulationseries2023

Johor       300
Kedah       200
Kelantan    100
Melaka       50
Pahang       10
dtype: int64

In [45]:
data = {
    'Johor': 300,
    'Kedah': 200,
    'Kelantan': 100,
    'Penang': 900,
    'Pahang': 10
}
mypopulationseries2024 = pd.Series(data)
mypopulationseries2024

Johor       300
Kedah       200
Kelantan    100
Penang      900
Pahang       10
dtype: int64

In [48]:
# mypopulationseries2023.add(mypopulationseries2024)    or 
mypopulationseries2023 + mypopulationseries2024

Johor       600.0
Kedah       400.0
Kelantan    200.0
Melaka        NaN
Pahang       20.0
Penang        NaN
dtype: float64

In [49]:
# mypopulationseries2023.sub(mypopulationseries2024)    or 
mypopulationseries2023 - mypopulationseries2024

Johor       0.0
Kedah       0.0
Kelantan    0.0
Melaka      NaN
Pahang      0.0
Penang      NaN
dtype: float64

In [51]:
mypopulationseries2023.mul(mypopulationseries2024)    
# mypopulationseries2023 * mypopulationseries2024

Johor       90000.0
Kedah       40000.0
Kelantan    10000.0
Melaka          NaN
Pahang        100.0
Penang          NaN
dtype: float64

In [52]:
mypopulationseries2023.div(mypopulationseries2024)    
# mypopulationseries2023 / mypopulationseries2024

Johor       1.0
Kedah       1.0
Kelantan    1.0
Melaka      NaN
Pahang      1.0
Penang      NaN
dtype: float64

### Functions

In [53]:
mydata = pd.Series([1,2,3])
mydata.prod()

np.int64(6)

In [54]:
np.std(mypopulationseries)

np.float64(105.33755265810954)

In [55]:
np.var(mypopulationseries)

np.float64(11096.0)

In [58]:
# covariance

myfirstseries = pd.Series([2,4,6])
mysecondseries = pd.Series ([1,3,5])
myfirstseries.cov(mysecondseries)

np.float64(4.0)

In [61]:
# correlation
# -1 negative
# 1 positive
# 0 no relationship

myfirstseries.corr(mysecondseries)

np.float64(1.0)

In [69]:
mythirdseries = pd.Series([2,4,6,4,8,10,4,4,12])
mythirdseries

0     2
1     4
2     6
3     4
4     8
5    10
6     4
7     4
8    12
dtype: int64

In [70]:
mythirdseries.size       # values

9

In [71]:
mythirdseries.count()    # not null values

np.int64(9)

In [72]:
mythirdseries.value_counts()

4     4
2     1
6     1
8     1
10    1
12    1
Name: count, dtype: int64