## Creating a Pandas Series

In [2]:
import numpy as np
import pandas as pd

## Creating a Pandas Series with Basic Format

In [4]:
pd.Series([2,3,6,5])

0    2
1    3
2    6
3    5
dtype: int64

In [5]:
ser = pd.Series([2,3,6,5])

In [6]:
ser

0    2
1    3
2    6
3    5
dtype: int64

## Basic Attributes of Series

In [7]:
type(ser)

pandas.core.series.Series

In [8]:
ser.dtype

dtype('int64')

In [9]:
ser.size

4

In [10]:
ser.ndim

1

In [11]:
ser.values

array([2, 3, 6, 5], dtype=int64)

In [13]:
for i in ser.values:
    print(i)

2
3
6
5


In [14]:
[i for i in ser.values]

[2, 3, 6, 5]

In [15]:
ser

0    2
1    3
2    6
3    5
dtype: int64

In [16]:
ser.head(3)  # returns top three values

0    2
1    3
2    6
dtype: int64

In [17]:
ser = pd.Series([1,2,3,4,5,6])

In [18]:
ser.head()  # default is 5

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [19]:
ser.head(6)

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [20]:
ser.tail()  # default 5 is here also

1    2
2    3
3    4
4    5
5    6
dtype: int64

In [21]:
ser.tail(2)

4    5
5    6
dtype: int64

In [24]:
[i for i in "clarusway"]

['c', 'l', 'a', 'r', 'u', 's', 'w', 'a', 'y']

In [23]:
pd.Series([i for i in "clarusway"])

0    c
1    l
2    a
3    r
4    u
5    s
6    w
7    a
8    y
dtype: object

## Creating Pandas Series by Using a 'list', numpy array or 'dictionary'

In [100]:
labels = ["a", "b", "c"]

my_list = [10, 20, 30]

arr = np.array([10, 20, 30])

d = {"a" : 10, "b" : 20, "c" : 30}

## Using List

In [26]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [27]:
pd.Series(my_list)

0    10
1    20
2    30
dtype: int64

## Using NumPy Arrays

In [28]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int32

In [30]:
pd.Series(data = arr, index = labels)

a    10
b    20
c    30
dtype: int32

## Using Dictionaries

In [31]:
d

{'a': 10, 'b': 20, 'c': 30}

In [32]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [33]:
pd.Series(data = d, index = ["q", "c", "y"])

q     NaN
c    30.0
y     NaN
dtype: float64

## Data in a Series
A pandas Series can hold a variety of object types:

In [None]:
# {} or ()

In [36]:
set1 = {1,2,3,6}

In [37]:
pd.Series(set1)  # set type is unordered | we can't create pd.series from a set

TypeError: 'set' type is unordered

In [38]:
pd.Series([sum, print, len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [40]:
mix_data = ["Aziz", 2, True]
pd.Series(mix_data)

0    Aziz
1       2
2    True
dtype: object

## Indexing Pandas Series
The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look up of information.

Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:

In [41]:
ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan'])

In [42]:
ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan'])

In [43]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [44]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [45]:
ser1["Germany"]

2

In [46]:
ser1 + ser2

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

Operations are then also done based off of index:

## Indexing Examples

In [101]:
a = np.array([1,2,33,444,75])
panser = pd.Series(a)
panser

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [53]:
panser[2]

33

In [54]:
panser[0:1]

0    1
dtype: int32

## pandas_series[index] | pandas_series[[indices, indices...]]

In [102]:
panser = pd.Series([121, 200, 150, 99], index = ["terry", "micheal", "orion", "jason"])
panser

terry      121
micheal    200
orion      150
jason       99
dtype: int64

In [57]:
panser["terry"]

121

In [58]:
panser[0]

121

In [59]:
panser[2]

150

In [64]:
index1 = ["terry", "micheal", "jason"]

In [65]:
panser[index1]

terry      121
micheal    200
jason       99
dtype: int64

In [70]:
panser["terry":"orion"]

terry      121
micheal    200
orion      150
dtype: int64

## Several Selecting Attributes

In [72]:
panser

terry      121
micheal    200
orion      150
jason       99
dtype: int64

In [68]:
panser.keys()

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [69]:
panser.index

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [71]:
panser.values

array([121, 200, 150,  99], dtype=int64)

In [75]:
panser.items

<bound method Series.items of terry      121
micheal    200
orion      150
jason       99
dtype: int64>

In [76]:
list(panser.items())

[('terry', 121), ('micheal', 200), ('orion', 150), ('jason', 99)]

In [103]:
for i, j in panser.items():
    print(f"{i} is key and {j} is value")

terry is key and 121 is value
micheal is key and 200 is value
orion is key and 150 is value
jason is key and 99 is value


In [81]:
panser

terry      121
micheal    200
orion      150
jason       99
dtype: int64

In [82]:
"terry" in panser  # in, >, <, <=, etc are return bool

True

In [83]:
"bob" in panser

False

In [85]:
121 in panser.values

True

In [86]:
55 in panser.values

False

## Broadcasting value inside the series

In [104]:
panser["terry"] = 90
panser

terry       90
micheal    200
orion      150
jason       99
dtype: int64

In [88]:
panser < 1000

terry      True
micheal    True
orion      True
jason      True
dtype: bool

In [89]:
panser == 100

terry      False
micheal    False
orion      False
jason      False
dtype: bool

In [90]:
panser == 90

terry       True
micheal    False
orion      False
jason      False
dtype: bool

In [106]:
panser[panser <= 150]

terry     90
orion    150
jason     99
dtype: int64

## The End of the Session - 3

# DATAFRAMES

In [1]:
data = [1,2,39,67,90]
data

[1, 2, 39, 67, 90]

In [3]:
pd.DataFrame(data, columns=["column"])

Unnamed: 0,column
0,1
1,2
2,39
3,67
4,90


In [6]:
data2 = np.arange(1,10).reshape(3,3)

In [8]:
pd.DataFrame(data2, columns=["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [10]:
data3 = np.arange(1,21).reshape(10,2)
data3

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12],
       [13, 14],
       [15, 16],
       [17, 18],
       [19, 20]])

In [17]:
data3 = pd.DataFrame(data3, columns=["var1","var2"])
data3

Unnamed: 0,var1,var2
0,1,2
1,3,4
2,5,6
3,7,8
4,9,10
5,11,12
6,13,14
7,15,16
8,17,18
9,19,20


In [13]:
df3 = pd.DataFrame(data3, columns=["var1","var2"])

In [15]:
df3.head(10)

Unnamed: 0,var1,var2
0,1,2
1,3,4
2,5,6
3,7,8
4,9,10
5,11,12
6,13,14
7,15,16
8,17,18
9,19,20


In [31]:
df3.tail(3)

Unnamed: 0,var1,var2
7,15,16
8,17,18
9,19,20


In [19]:
df3.columns

Index(['var1', 'var2'], dtype='object')

In [20]:
for i in df3.columns:
    print(i)

var1
var2


In [21]:
[i for i in df3.columns]

['var1', 'var2']

In [34]:
df3

Unnamed: 0,var1,var2
0,1,2
1,3,4
2,5,6
3,7,8
4,9,10
5,11,12
6,13,14
7,15,16
8,17,18
9,19,20


In [36]:
df3.columns = ["new1","new2"]
df3

Unnamed: 0,new1,new2
0,1,2
1,3,4
2,5,6
3,7,8
4,9,10
5,11,12
6,13,14
7,15,16
8,17,18
9,19,20


In [24]:
type(df3)

pandas.core.frame.DataFrame

In [25]:
df3.shape

(10, 2)

In [26]:
df3.ndim

2

In [None]:
1 dim --> vector
2 dim --> matrix
3 dim --> tensor